From 3c1e320a55ea72d9d0df73f41ec584e19805864a Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 6 Aug 2024 11:14:35 +0800 Subject: [PATCH 001/140] [nix] bump buddy-mlir Signed-off-by: Avimitin --- nix/pkgs/buddy-mlir.nix | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/nix/pkgs/buddy-mlir.nix b/nix/pkgs/buddy-mlir.nix index 8bd03422f..abdae657a 100644 --- a/nix/pkgs/buddy-mlir.nix +++ b/nix/pkgs/buddy-mlir.nix @@ -17,8 +17,8 @@ stdenv.mkDerivation { src = fetchFromGitHub { owner = "buddy-compiler"; repo = "buddy-mlir"; - rev = "ec8a17969b645f0a0c1a822ffb04192b236b5c88"; - hash = "sha256-3ecxei/nkx8sjgVkeQvZMaxr1CQXwhTz8aY1e0I3zBA="; + rev = "be2811cde9158faa0c08ad90801edf5ebfcf8e0e"; + hash = "sha256-5ZFqDZZjMbVoqbEZ1mt1RXY2oR+VSQ6wJ1dQJCGrRC4="; }; unpackPhase = '' # We can only use one-step build now...buddy-mlir have bad build system that always @@ -34,15 +34,6 @@ stdenv.mkDerivation { nativeBuildInputs = [ cmake ninja python3 bintools ]; - prePatch = "pushd ../buddy-mlir"; - patches = [ - (fetchpatch { - url = "https://github.com/buddy-compiler/buddy-mlir/pull/357.patch"; - hash = "sha256-ysPcHAkrFJDtHmWVo35Wz6ullIGsP1EedYdJCq4fRX4="; - }) - ]; - postPatch = "popd"; - cmakeDir = "../llvm"; cmakeFlags = [ "-DCMAKE_BUILD_TYPE=Release" From fc61224f8237ad005733a891226a105479506f0d Mon Sep 17 00:00:00 2001 From: Lucas-Wye Date: Sun, 7 Jul 2024 19:32:14 +0800 Subject: [PATCH 002/140] [rtl] support zvbb --- configgen/generated/blastoise.json | 83 +------- configgen/generated/machamp.json | 83 +------- configgen/generated/psyduck.json | 191 ++++++++++++++++++ configgen/generated/sandslash.json | 131 +----------- configgen/src/Main.scala | 60 +++++- t1/src/LaneZvbb.scala | 178 ++++++++++++++++ t1/src/T1.scala | 14 +- t1/src/VectorFunctionUnit.scala | 6 +- t1/src/decoder/Decoder.scala | 25 ++- t1/src/decoder/InstructionDocumentation.scala | 17 ++ t1/src/decoder/T1DecodePattern.scala | 1 + t1/src/decoder/attribute/isCrosswrite.scala | 4 + t1/src/decoder/attribute/isItype.scala | 3 + t1/src/decoder/attribute/isPopcount.scala | 1 + t1/src/decoder/attribute/isScheduler.scala | 2 +- t1/src/decoder/attribute/isSreadvd.scala | 2 +- t1/src/decoder/attribute/isSwrite.scala | 6 +- t1/src/decoder/attribute/isUnsigned0.scala | 16 ++ t1/src/decoder/attribute/isUnsigned1.scala | 16 ++ t1/src/decoder/attribute/isVtype.scala | 5 + t1/src/decoder/attribute/isZvbb.scala | 50 +++++ t1/src/decoder/attribute/uop.scala | 3 +- t1/src/decoder/attribute/zvbbUop.scala | 95 +++++++++ 23 files changed, 688 insertions(+), 304 deletions(-) create mode 100644 configgen/generated/psyduck.json create mode 100644 t1/src/LaneZvbb.scala create mode 100644 t1/src/decoder/attribute/isZvbb.scala create mode 100644 t1/src/decoder/attribute/zvbbUop.scala diff --git a/configgen/generated/blastoise.json b/configgen/generated/blastoise.json index d0e26eb82..290ef86c1 100644 --- a/configgen/generated/blastoise.json +++ b/configgen/generated/blastoise.json @@ -6,86 +6,6 @@ "Zve32f" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01???????????????????????00?????\nb10???????????????????????00?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01???????????????????????01?????\nb10???????????????????????01?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01???????????????????????10?????\nb10???????????????????????10?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01???????????????????????11?????\nb10???????????????????????11?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b11000000000?????????????000?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b11000000000?????????????001?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b11000000000?????????????010?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b11000000000?????????????011?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b11000000000?????????????100?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b11000000000?????????????101?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b11000000000?????????????110?????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b11000000000?????????????111?????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 1, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", "vfuInstantiateParameter": { @@ -246,7 +166,8 @@ 3 ] ] - ] + ], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/generated/machamp.json b/configgen/generated/machamp.json index dc0a4b2d9..ceeaf5e59 100644 --- a/configgen/generated/machamp.json +++ b/configgen/generated/machamp.json @@ -6,86 +6,6 @@ "Zve32x" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01??????????????????????00??????\nb10??????????????????????00??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01??????????????????????01??????\nb10??????????????????????01??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01??????????????????????10??????\nb10??????????????????????10??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01??????????????????????11??????\nb10??????????????????????11??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b11000000000????????????000??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b11000000000????????????001??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b11000000000????????????010??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b11000000000????????????011??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b11000000000????????????100??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b11000000000????????????101??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b11000000000????????????110??????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b11000000000????????????111??????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 2, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w", "vfuInstantiateParameter": { @@ -230,7 +150,8 @@ ] ] ], - "floatModuleParameters": [] + "floatModuleParameters": [], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/generated/psyduck.json b/configgen/generated/psyduck.json new file mode 100644 index 000000000..04a2f3572 --- /dev/null +++ b/configgen/generated/psyduck.json @@ -0,0 +1,191 @@ +{ + "parameter": { + "vLen": 512, + "dLen": 256, + "extensions": [ + "Zve32f", + "Zvbb" + ], + "t1customInstructions": [], + "vrfBankSize": 1, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rwp1rw", + "vfuInstantiateParameter": { + "slotCount": 4, + "logicModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.MaskedLogic" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "aluModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 0 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 1 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 2 + ] + ], + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneAdder" + }, + [ + 3 + ] + ] + ], + "shifterModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneShifter" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "mulModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 2 + }, + "generator": "org.chipsalliance.t1.rtl.LaneMul" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "divModuleParameters": [], + "divfpModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.LaneDivFP" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "otherModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "vlMaxBits": 10, + "groupNumberBits": 4, + "laneNumberBits": 3, + "dataPathByteWidth": 4, + "latency": 1 + }, + "generator": "org.chipsalliance.t1.rtl.OtherUnit" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "floatModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneFloat" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ], + "zvbbModuleParameters": [ + [ + { + "parameter": { + "datapathWidth": 32, + "latency": 3 + }, + "generator": "org.chipsalliance.t1.rtl.LaneZvbb" + }, + [ + 0, + 1, + 2, + 3 + ] + ] + ] + } + }, + "generator": "org.chipsalliance.t1.rtl.T1" +} \ No newline at end of file diff --git a/configgen/generated/sandslash.json b/configgen/generated/sandslash.json index 5ae0cb6b3..688085fe1 100644 --- a/configgen/generated/sandslash.json +++ b/configgen/generated/sandslash.json @@ -6,134 +6,6 @@ "Zve32x" ], "t1customInstructions": [], - "lsuBankParameters": [ - { - "name": "scalar", - "region": "b00??????????????????????????????", - "beatbyte": 8, - "accessScalar": true - }, - { - "name": "ddrBank0", - "region": "b01?????????????????????00???????\nb10?????????????????????00???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank1", - "region": "b01?????????????????????01???????\nb10?????????????????????01???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank2", - "region": "b01?????????????????????10???????\nb10?????????????????????10???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "ddrBank3", - "region": "b01?????????????????????11???????\nb10?????????????????????11???????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank0", - "region": "b1100000000?????????0000?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank1", - "region": "b1100000000?????????0001?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank2", - "region": "b1100000000?????????0010?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank3", - "region": "b1100000000?????????0011?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank4", - "region": "b1100000000?????????0100?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank5", - "region": "b1100000000?????????0101?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank6", - "region": "b1100000000?????????0110?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank7", - "region": "b1100000000?????????0111?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank8", - "region": "b1100000000?????????1000?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank9", - "region": "b1100000000?????????1001?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank10", - "region": "b1100000000?????????1010?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank11", - "region": "b1100000000?????????1011?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank12", - "region": "b1100000000?????????1100?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank13", - "region": "b1100000000?????????1101?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank14", - "region": "b1100000000?????????1110?????????", - "beatbyte": 8, - "accessScalar": false - }, - { - "name": "sramBank15", - "region": "b1100000000?????????1111?????????", - "beatbyte": 8, - "accessScalar": false - } - ], "vrfBankSize": 4, "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rw", "vfuInstantiateParameter": { @@ -278,7 +150,8 @@ ] ] ], - "floatModuleParameters": [] + "floatModuleParameters": [], + "zvbbModuleParameters": [] } }, "generator": "org.chipsalliance.t1.rtl.T1" diff --git a/configgen/src/Main.scala b/configgen/src/Main.scala index c48760fef..88e3bc326 100644 --- a/configgen/src/Main.scala +++ b/configgen/src/Main.scala @@ -99,7 +99,59 @@ object Main { ), Seq(0, 1, 2, 3))), floatModuleParameters = - Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = Seq() + ) + ) + if (doEmit) param.emit(targetFile) + param + } + + // DLEN256 VLEN256; FP; VRF p0rw,p1rw bank1; LSU bank8 beatbyte 8; Zvbb + @main def psyduck( + @arg(name = "target-file", short = 't') targetFile: os.Path, + @arg(name = "emit", short = 'e', doc = "emit config") doEmit: Boolean = true + ): T1Parameter = { + val vLen = 512 + val dLen = 256 + val param = T1Parameter( + vLen, + dLen, + extensions = Seq("Zve32f", "Zvbb"), + t1customInstructions = Nil, + vrfBankSize = 1, + vrfRamType = RamType.p0rwp1rw, + vfuInstantiateParameter = VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneZvbb], LaneZvbbParam(32, 3)), Seq(0, 1, 2, 3))) ) ) if (doEmit) param.emit(targetFile) @@ -148,7 +200,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) @@ -197,7 +250,8 @@ object Main { OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() // TODO ) ) if (doEmit) param.emit(targetFile) diff --git a/t1/src/LaneZvbb.scala b/t1/src/LaneZvbb.scala new file mode 100644 index 000000000..a438f363c --- /dev/null +++ b/t1/src/LaneZvbb.scala @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl + +import chisel3.experimental.hierarchy.instantiable +import chisel3._ +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +import org.chipsalliance.t1.rtl.decoder.{BoolField, Decoder} + +object LaneZvbbParam { + implicit def rw: upickle.default.ReadWriter[LaneZvbbParam] = upickle.default.macroRW +} + +case class LaneZvbbParam(datapathWidth: Int, latency: Int) extends VFUParameter with SerializableModuleParameter { + val inputBundle = new LaneZvbbRequest(datapathWidth) + val decodeField: BoolField = Decoder.zvbb + val outputBundle = new LaneZvbbResponse(datapathWidth) + override val NeedSplit: Boolean = false +} + +class LaneZvbbRequest(datapathWidth: Int) extends VFUPipeBundle { + val src = Vec(3, UInt(datapathWidth.W)) + val opcode = UInt(4.W) + val vSew = UInt(2.W) + val shifterSize = UInt(log2Ceil(datapathWidth).W) +} + +class LaneZvbbResponse(datapathWidth: Int) extends VFUPipeBundle { + val data = UInt(datapathWidth.W) +} + +@instantiable +class LaneZvbb(val parameter: LaneZvbbParam) + extends VFUModule(parameter) with SerializableModule[LaneZvbbParam]{ + val response: LaneZvbbResponse = Wire(new LaneZvbbResponse(parameter.datapathWidth)) + val request : LaneZvbbRequest = connectIO(response).asTypeOf(parameter.inputBundle) + + val zvbbSrc: UInt = request.src(1) // vs2 + val zvbbRs: UInt = request.src(0) // vs1 or rs1 + val vSew: UInt = UIntToOH(request.vSew) // sew = 0, 1, 2 + + val zvbbBRev = VecInit(zvbbSrc.asBools.reverse).asUInt // element's bit reverse + val zvbbBRev8 = VecInit(zvbbSrc.asBools.grouped(8).map(s => VecInit(s.reverse)).toSeq).asUInt // byte's bit reverse + val zvbbRev8 = VecInit(zvbbSrc.asBools.grouped(8).map(s => VecInit(s)).toSeq.reverse).asUInt // element's byte reverse + + val zvbbSrc16a = zvbbSrc(parameter.datapathWidth-1, parameter.datapathWidth-16) + val zvbbSrc16b = zvbbSrc(parameter.datapathWidth-17, parameter.datapathWidth-32) + val zvbbSrc8a = zvbbSrc(parameter.datapathWidth-1, parameter.datapathWidth-8) + val zvbbSrc8b = zvbbSrc(parameter.datapathWidth-9, parameter.datapathWidth-16) + val zvbbSrc8c = zvbbSrc(parameter.datapathWidth-17, parameter.datapathWidth-24) + val zvbbSrc8d = zvbbSrc(parameter.datapathWidth-25, parameter.datapathWidth-32) + + val zvbbRs16a = zvbbRs(parameter.datapathWidth-1, parameter.datapathWidth-16) + val zvbbRs16b = zvbbRs(parameter.datapathWidth-17, parameter.datapathWidth-32) + val zvbbRs8a = zvbbRs(parameter.datapathWidth-1, parameter.datapathWidth-8) + val zvbbRs8b = zvbbRs(parameter.datapathWidth-9, parameter.datapathWidth-16) + val zvbbRs8c = zvbbRs(parameter.datapathWidth-17, parameter.datapathWidth-24) + val zvbbRs8d = zvbbRs(parameter.datapathWidth-25, parameter.datapathWidth-32) + + val zero32: UInt = 0.U(32.W) + val zero16: UInt = 0.U(16.W) + val zero10: UInt = 0.U(11.W) + val zero8: UInt = 0.U(8.W) + val zero3: UInt = 0.U(4.W) + + val zvbbCLZ32: UInt = (32.U - PopCount(scanRightOr(zvbbSrc))).asUInt + val zvbbCLZ16: UInt = { + val clz16a: UInt = (16.U - PopCount(scanRightOr(zvbbSrc16a))).asUInt(4, 0) + val clz16b: UInt = (16.U - PopCount(scanRightOr(zvbbSrc16b))).asUInt(4, 0) + zero10 ## clz16a ## zero10 ## clz16b + } + val zvbbCLZ8: UInt = { + val clz8a: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8a))).asUInt(3, 0) + val clz8b: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8b))).asUInt(3, 0) + val clz8c: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8c))).asUInt(3, 0) + val clz8d: UInt = (8.U - PopCount(scanRightOr(zvbbSrc8d))).asUInt(3, 0) + zero3 ## clz8a ## zero3 ## clz8b ## zero3 ## clz8c ## zero3 ## clz8d + } + val zvbbCLZ: UInt = Mux1H(vSew, Seq( + zvbbCLZ8, + zvbbCLZ16, + zvbbCLZ32, + )) + + val zvbbCTZ32 = (32.U - PopCount(scanLeftOr(zvbbSrc))).asUInt + val zvbbCTZ16: UInt = { + val ctz16a: UInt = (16.U - PopCount(scanLeftOr(zvbbSrc16a))).asUInt(4, 0) + val ctz16b: UInt = (16.U - PopCount(scanLeftOr(zvbbSrc16b))).asUInt(4, 0) + zero10 ## ctz16a ## zero10 ## ctz16b + } + val zvbbCTZ8: UInt = { + val ctz8a: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8a))).asUInt(3, 0) + val ctz8b: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8b))).asUInt(3, 0) + val ctz8c: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8c))).asUInt(3, 0) + val ctz8d: UInt = (8.U - PopCount(scanLeftOr(zvbbSrc8d))).asUInt(3, 0) + zero3 ## ctz8a ## zero3 ## ctz8b ## zero3 ## ctz8c ## zero3 ## ctz8d + } + val zvbbCTZ = Mux1H(vSew, Seq( + zvbbCTZ8, + zvbbCTZ16, + zvbbCTZ32, + )) + + val zvbbROL32 = zvbbSrc.rotateLeft(zvbbRs(4, 0)).asUInt + val zvbbROL16: UInt = { + val rol16a = zvbbSrc16a.rotateLeft(zvbbRs16a(3, 0)).asUInt(15, 0) + val rol16b = zvbbSrc16b.rotateLeft(zvbbRs16b(3, 0)).asUInt(15, 0) + rol16a ## rol16b + } + val zvbbROL8: UInt = { + val rol8a = zvbbSrc8a.rotateLeft(zvbbRs8a(2, 0)).asUInt(7, 0) + val rol8b = zvbbSrc8b.rotateLeft(zvbbRs8b(2, 0)).asUInt(7, 0) + val rol8c = zvbbSrc8c.rotateLeft(zvbbRs8c(2, 0)).asUInt(7, 0) + val rol8d = zvbbSrc8d.rotateLeft(zvbbRs8d(2, 0)).asUInt(7, 0) + rol8a ## rol8b ## rol8c ## rol8d + } + val zvbbROL = Mux1H(vSew, Seq( + zvbbROL8, + zvbbROL16, + zvbbROL32, + )) + + val zvbbROR32 = zvbbSrc.rotateRight(zvbbRs(4, 0)).asUInt + val zvbbROR16: UInt = { + val ror16a = zvbbSrc16a.rotateRight(zvbbRs16a(3, 0)).asUInt(15, 0) + val ror16b = zvbbSrc16b.rotateRight(zvbbRs16b(3, 0)).asUInt(15, 0) + ror16a ## ror16b + } + val zvbbROR8: UInt = { + val ror8a = zvbbSrc8a.rotateRight(zvbbRs8a(2, 0)).asUInt(7, 0) + val ror8b = zvbbSrc8b.rotateRight(zvbbRs8b(2, 0)).asUInt(7, 0) + val ror8c = zvbbSrc8c.rotateRight(zvbbRs8c(2, 0)).asUInt(7, 0) + val ror8d = zvbbSrc8d.rotateRight(zvbbRs8d(2, 0)).asUInt(7, 0) + ror8a ## ror8b ## ror8c ## ror8d + } + val zvbbROR = Mux1H(vSew, Seq( + zvbbROR8, + zvbbROR16, + zvbbROR32, + )) + + val zvbbSLL64_32 = ((zero32 ## zvbbSrc).asUInt << zvbbRs(4, 0)).asUInt(31, 0) + val zvbbSLL64_16: UInt = { + val sll64_16a = ((zero16 ## zvbbSrc16a).asUInt << zvbbRs16a(3, 0)).asUInt(15, 0) + val sll64_16b = ((zero16 ## zvbbSrc16b).asUInt << zvbbRs16b(3, 0)).asUInt(15, 0) + sll64_16a ## sll64_16b + } + val zvbbSLL64_8: UInt = { + val sll64_8a = ((zero8 ## zvbbSrc8a).asUInt << zvbbRs8a(2, 0)).asUInt(7, 0) + val sll64_8b = ((zero8 ## zvbbSrc8b).asUInt << zvbbRs8b(2, 0)).asUInt(7, 0) + val sll64_8c = ((zero8 ## zvbbSrc8c).asUInt << zvbbRs8c(2, 0)).asUInt(7, 0) + val sll64_8d = ((zero8 ## zvbbSrc8d).asUInt << zvbbRs8d(2, 0)).asUInt(7, 0) + sll64_8a ## sll64_8b ## sll64_8c ## sll64_8d + } + val zvbbSLL64 = Mux1H(vSew, Seq( + zvbbSLL64_8, + zvbbSLL64_16, + zvbbSLL64_32, + )) + val zvbbSLL = zvbbSLL64(parameter.datapathWidth-1, 0) + + val zvbbANDN = zvbbSrc & (~zvbbRs) + + response.data := Mux1H(UIntToOH(request.opcode), Seq( + zvbbBRev, + zvbbBRev8, + zvbbRev8, + zvbbCLZ, + zvbbCTZ, + zvbbROL, + zvbbROR, + zvbbSLL, + zvbbANDN, + )) +} + diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 4dca51f4e..1395dd89b 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -118,7 +118,12 @@ case class T1Parameter( val allInstructions: Seq[Instruction] = { org.chipsalliance.rvdecoderdb.instructions(org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)) - .filter(instruction => instruction.instructionSet.name == "rv_v")++ + .filter{ + instruction => instruction.instructionSet.name match { + case "rv_v" => true + case "rv_zvbb" => if (zvbbEnable) true else false + case _ => false + }} ++ t1customInstructions.map(_.instruction) }.toSeq.sortBy(_.instructionSet.name).filter{ insn => insn.name match { @@ -127,7 +132,7 @@ case class T1Parameter( } } - require(extensions.forall(Seq("Zve32x", "Zve32f").contains), "unsupported extension.") + require(extensions.forall(Seq("Zve32x", "Zve32f", "Zvbb").contains), "unsupported extension.") // TODO: require bank not overlap /** xLen of T1, we currently only support 32. */ val xLen: Int = 32 @@ -144,6 +149,9 @@ case class T1Parameter( /** does t1 has floating datapath? */ val fpuEnable: Boolean = extensions.contains("Zve32f") + /** support of zvbb */ + lazy val zvbbEnable: Boolean = extensions.contains("Zvbb") + /** how many chaining does T1 support, this is not a parameter yet. */ val chainingSize: Int = 4 @@ -217,7 +225,7 @@ case class T1Parameter( // and the values are their respective delays. val crossLaneConnectCycles: Seq[Seq[Int]] = Seq.tabulate(laneNumber)(_ => Seq(1, 1)) - val decoderParam: DecoderParam = DecoderParam(fpuEnable, allInstructions) + val decoderParam: DecoderParam = DecoderParam(fpuEnable, zvbbEnable, allInstructions) /** paraemter for AXI4. */ val axi4BundleParameter: AXI4BundleParameter = AXI4BundleParameter( diff --git a/t1/src/VectorFunctionUnit.scala b/t1/src/VectorFunctionUnit.scala index 25ff98a49..cf06a66af 100644 --- a/t1/src/VectorFunctionUnit.scala +++ b/t1/src/VectorFunctionUnit.scala @@ -105,7 +105,8 @@ case class VFUInstantiateParameter( divModuleParameters: Seq[(SerializableModuleGenerator[LaneDiv, LaneDivParam], Seq[Int])], divfpModuleParameters: Seq[(SerializableModuleGenerator[LaneDivFP, LaneDivFPParam], Seq[Int])], otherModuleParameters: Seq[(SerializableModuleGenerator[OtherUnit, OtherUnitParam], Seq[Int])], - floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])] + floatModuleParameters: Seq[(SerializableModuleGenerator[LaneFloat, LaneFloatParam], Seq[Int])], + zvbbModuleParameters: Seq[(SerializableModuleGenerator[LaneZvbb, LaneZvbbParam], Seq[Int])] ) { val genVec: Seq[(SerializableModuleGenerator[_ <: VFUModule, _ <: VFUParameter], Seq[Int])] = logicModuleParameters ++ @@ -115,7 +116,8 @@ case class VFUInstantiateParameter( divModuleParameters ++ divfpModuleParameters ++ otherModuleParameters ++ - floatModuleParameters + floatModuleParameters ++ + zvbbModuleParameters genVec.foreach { case (_, connect) => connect.foreach(connectIndex => require(connectIndex < slotCount)) diff --git a/t1/src/decoder/Decoder.scala b/t1/src/decoder/Decoder.scala index 3ebe07df3..3a0299389 100644 --- a/t1/src/decoder/Decoder.scala +++ b/t1/src/decoder/Decoder.scala @@ -13,7 +13,7 @@ import org.chipsalliance.t1.rtl.decoder.attribute._ object DecoderParam { implicit def rwP: upickle.default.ReadWriter[DecoderParam] = upickle.default.macroRW } -case class DecoderParam(fpuEnable: Boolean, allInstructions: Seq[Instruction]) +case class DecoderParam(fpuEnable: Boolean, zvbbEnable: Boolean, allInstructions: Seq[Instruction]) trait T1DecodeFiled[D <: Data] extends DecodeField[T1DecodePattern, D] with FieldName @@ -221,6 +221,10 @@ object Decoder { override def getTriState(pattern: T1DecodePattern): TriState = pattern.isOrderreduce.value } + object zvbb extends BoolField { + override def getTriState(pattern: T1DecodePattern): TriState = pattern.isZvbb.value + } + object topUop extends T1TopUopField { override def genTable(pattern: T1DecodePattern): BitPat = pattern.topUop.value match { case _: TopT0.type => BitPat("b000") @@ -328,6 +332,19 @@ object Decoder { case _: zeroUop0.type => BitPat("b0000") case _ => BitPat.dontCare(4) } + case zvbbCase: ZvbbUOPType => + zvbbCase match { + case _: zvbbUop0.type => BitPat("b0000") // brev + case _: zvbbUop1.type => BitPat("b0001") // brev8 + case _: zvbbUop2.type => BitPat("b0010") // rev8 + case _: zvbbUop3.type => BitPat("b0011") // clz + case _: zvbbUop4.type => BitPat("b0100") // ctz + case _: zvbbUop5.type => BitPat("b0101") // rol + case _: zvbbUop6.type => BitPat("b0110") // ror + case _: zvbbUop7.type => BitPat("b0111") // wsll + case _: zvbbUop8.type => BitPat("b1000") // andn + case _ => BitPat.dontCare(4) + } case _ => BitPat.dontCare(4) } } @@ -399,6 +416,12 @@ object Decoder { orderReduce ) else Seq() + } ++ { + if (param.zvbbEnable) + Seq( + zvbb, + ) + else Seq() } def allDecodePattern(param: DecoderParam): Seq[T1DecodePattern] = param.allInstructions.map(T1DecodePattern(_, param)).toSeq.sortBy(_.instruction.name) diff --git a/t1/src/decoder/InstructionDocumentation.scala b/t1/src/decoder/InstructionDocumentation.scala index 22cf95823..86c5a7e35 100644 --- a/t1/src/decoder/InstructionDocumentation.scala +++ b/t1/src/decoder/InstructionDocumentation.scala @@ -422,5 +422,22 @@ case class InstructionDocumentation(instruction: Instruction, param: DecoderPara case "vzext.vf2" => "TODO!" case "vzext.vf4" => "TODO!" case "vzext.vf8" => "TODO!" + // rv_zvbb + case "vandn.vv" => "TODO!" + case "vandn.vx" => "TODO!" + case "vbrev.v" => "TODO!" + case "vbrev8.v" => "TODO!" + case "vrev8.v" => "TODO!" + case "vclz.v" => "TODO!" + case "vctz.v" => "TODO!" + case "vcpop.v" => "TODO!" + case "vrol.vv" => "TODO!" + case "vrol.vx" => "TODO!" + case "vror.vv" => "TODO!" + case "vror.vx" => "TODO!" + case "vror.vi" => "TODO!" + case "vwsll.vv" => "TODO!" + case "vwsll.vx" => "TODO!" + case "vwsll.vi" => "TODO!" } } diff --git a/t1/src/decoder/T1DecodePattern.scala b/t1/src/decoder/T1DecodePattern.scala index d1bb84930..5c7d10733 100644 --- a/t1/src/decoder/T1DecodePattern.scala +++ b/t1/src/decoder/T1DecodePattern.scala @@ -107,6 +107,7 @@ case class T1DecodePattern(instruction: Instruction, param: DecoderParam) extend def isVtype: isVtype = attribute.isVtype(this) def isVwmacc: isVwmacc = attribute.isVwmacc(this) def isWidenreduce: isWidenreduce = attribute.isWidenreduce(this) + def isZvbb: isZvbb = attribute.isZvbb(this) def fpExecutionType: FpExecutionType.Type = attribute.FpExecutionType(this) def topUop: TopUop = attribute.TopUop(this) def decoderUop: DecoderUop = attribute.DecoderUop(this) diff --git a/t1/src/decoder/attribute/isCrosswrite.scala b/t1/src/decoder/attribute/isCrosswrite.scala index cbe920dbb..bddbc3818 100644 --- a/t1/src/decoder/attribute/isCrosswrite.scala +++ b/t1/src/decoder/attribute/isCrosswrite.scala @@ -46,6 +46,10 @@ object isCrosswrite { "vwsubu.vx", "vwsubu.wv", "vwsubu.wx", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isItype.scala b/t1/src/decoder/attribute/isItype.scala index aafc0641c..5ba9baf2e 100644 --- a/t1/src/decoder/attribute/isItype.scala +++ b/t1/src/decoder/attribute/isItype.scala @@ -51,6 +51,9 @@ object isItype { "vssra.vi", "vssrl.vi", "vxor.vi", + // rv_zvbb + "vror.vi", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isPopcount.scala b/t1/src/decoder/attribute/isPopcount.scala index 0137b77b0..3a949c436 100644 --- a/t1/src/decoder/attribute/isPopcount.scala +++ b/t1/src/decoder/attribute/isPopcount.scala @@ -18,6 +18,7 @@ object isPopcount { def y(t1DecodePattern: T1DecodePattern): Boolean = { val allMatched = Seq( "vcpop.m", + "vcpop.v", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isScheduler.scala b/t1/src/decoder/attribute/isScheduler.scala index 229c45575..423b59a35 100644 --- a/t1/src/decoder/attribute/isScheduler.scala +++ b/t1/src/decoder/attribute/isScheduler.scala @@ -274,5 +274,5 @@ object isScheduler { } case class isScheduler(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. */ " + override val description: String = "lane will send request to Sequencer and wait ack from Sequencer. Instructions that will communicate with T1 top module.*/ " } diff --git a/t1/src/decoder/attribute/isSreadvd.scala b/t1/src/decoder/attribute/isSreadvd.scala index bf9fc6837..e6fa9bb76 100644 --- a/t1/src/decoder/attribute/isSreadvd.scala +++ b/t1/src/decoder/attribute/isSreadvd.scala @@ -307,5 +307,5 @@ object isSreadvd { } case class isSreadvd(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sReadVD -> !(ma || maskLogic) instruction need to read vd as operator. " + override val description: String = "sReadVD -> !(ma || maskLogic): instructions that need to read vd as the operator. " } diff --git a/t1/src/decoder/attribute/isSwrite.scala b/t1/src/decoder/attribute/isSwrite.scala index cfddf2e04..f16f28e5d 100644 --- a/t1/src/decoder/attribute/isSwrite.scala +++ b/t1/src/decoder/attribute/isSwrite.scala @@ -210,6 +210,10 @@ object isSwrite { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } @@ -224,5 +228,5 @@ object isSwrite { } case class isSwrite(value: TriState) extends BooleanDecodeAttribute { - override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. " + override val description: String = "sWrite -> targetRd || readOnly || crossWrite || maskDestination || reduce || loadStore instruction will write vd or rd(scalar) from outside of lane. It will request vrf wait, and lane will not write. No write to vd when isSwrite is True!!!" } diff --git a/t1/src/decoder/attribute/isUnsigned0.scala b/t1/src/decoder/attribute/isUnsigned0.scala index c180180bd..fb041c3c7 100644 --- a/t1/src/decoder/attribute/isUnsigned0.scala +++ b/t1/src/decoder/attribute/isUnsigned0.scala @@ -130,6 +130,22 @@ object isUnsigned0 { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isUnsigned1.scala b/t1/src/decoder/attribute/isUnsigned1.scala index 1f71f2310..cf4f517a0 100644 --- a/t1/src/decoder/attribute/isUnsigned1.scala +++ b/t1/src/decoder/attribute/isUnsigned1.scala @@ -102,6 +102,22 @@ object isUnsigned1 { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isVtype.scala b/t1/src/decoder/attribute/isVtype.scala index 605588b08..7649d715a 100644 --- a/t1/src/decoder/attribute/isVtype.scala +++ b/t1/src/decoder/attribute/isVtype.scala @@ -181,6 +181,11 @@ object isVtype { "vzext.vf2", "vzext.vf4", "vzext.vf8", + // rv_zvbb + "vandn.vv", + "vrol.vv", + "vror.vv", + "vwsll.vv", ) allMatched.contains(t1DecodePattern.instruction.name) } diff --git a/t1/src/decoder/attribute/isZvbb.scala b/t1/src/decoder/attribute/isZvbb.scala new file mode 100644 index 000000000..c5735aaf9 --- /dev/null +++ b/t1/src/decoder/attribute/isZvbb.scala @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +object isZvbb { + def apply(t1DecodePattern: T1DecodePattern): isZvbb = + Seq( + y _ -> Y, + n _ -> N, + dc _ -> DC + ).collectFirst { + case (fn, tri) if fn(t1DecodePattern) => isZvbb(tri) + }.get + + def y(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = if(t1DecodePattern.param.zvbbEnable) Seq( + "vandn.vv", + "vandn.vx", + "vbrev.v", + "vbrev8.v", + "vrev8.v", + "vclz.v", + "vctz.v", + "vrol.vv", + "vrol.vx", + "vror.vv", + "vror.vx", + "vror.vi", + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", + ) else Seq() + allMatched.contains(t1DecodePattern.instruction.name) + } + def n(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched = t1DecodePattern.param.allInstructions.filter(i => + !(y(t1DecodePattern) || dc(t1DecodePattern)) + ) + allMatched.contains(t1DecodePattern.instruction) + } + + def dc(t1DecodePattern: T1DecodePattern): Boolean = false +} + +case class isZvbb(value: TriState) extends BooleanDecodeAttribute { + override val description: String = "goes to [[org.chipsalliance.t1.rtl.LaneZvbb]]." +} diff --git a/t1/src/decoder/attribute/uop.scala b/t1/src/decoder/attribute/uop.scala index 66d8dbf02..97d49365c 100644 --- a/t1/src/decoder/attribute/uop.scala +++ b/t1/src/decoder/attribute/uop.scala @@ -15,7 +15,8 @@ object DecoderUop { isLogic.y(t1DecodePattern) -> LogicUop(t1DecodePattern), isShift.y(t1DecodePattern) -> ShiftUop(t1DecodePattern), isOther.y(t1DecodePattern) -> OtherUop(t1DecodePattern), - isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern) + isZero.y(t1DecodePattern) -> ZeroUOP(t1DecodePattern), + isZvbb.y(t1DecodePattern) -> ZvbbUOP(t1DecodePattern), ).collectFirst { case (fn, tpe) if fn => DecoderUop(tpe) } diff --git a/t1/src/decoder/attribute/zvbbUop.scala b/t1/src/decoder/attribute/zvbbUop.scala new file mode 100644 index 000000000..06524e95b --- /dev/null +++ b/t1/src/decoder/attribute/zvbbUop.scala @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rtl.decoder.attribute + +import org.chipsalliance.t1.rtl.decoder.T1DecodePattern + +trait ZvbbUOPType extends Uop +object zvbbUop0 extends ZvbbUOPType // brev +object zvbbUop1 extends ZvbbUOPType // brev8 +object zvbbUop2 extends ZvbbUOPType // rev8 +object zvbbUop3 extends ZvbbUOPType // clz +object zvbbUop4 extends ZvbbUOPType // ctz +object zvbbUop5 extends ZvbbUOPType // rol +object zvbbUop6 extends ZvbbUOPType // ror +object zvbbUop7 extends ZvbbUOPType // wsll +object zvbbUop8 extends ZvbbUOPType // andn + +object ZvbbUOP { + def apply(t1DecodePattern: T1DecodePattern): Uop = { + Seq( + t0 _ -> zvbbUop0, + t1 _ -> zvbbUop1, + t2 _ -> zvbbUop2, + t3 _ -> zvbbUop3, + t4 _ -> zvbbUop4, + t5 _ -> zvbbUop5, + t6 _ -> zvbbUop6, + t7 _ -> zvbbUop7, + t8 _ -> zvbbUop8, + ).collectFirst { + case (fn, tpe) if fn(t1DecodePattern) => tpe + }.getOrElse(UopDC) + } + def t0(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t1(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vbrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t2(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrev8.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t3(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vclz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t4(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vctz.v" + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t5(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vrol.vv", + "vrol.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t6(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vror.vv", + "vror.vx", + "vror.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t7(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vwsll.vv", + "vwsll.vx", + "vwsll.vi", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } + def t8(t1DecodePattern: T1DecodePattern): Boolean = { + val allMatched: Seq[String] = Seq( + "vandn.vv", + "vandn.vx", + ) + allMatched.contains(t1DecodePattern.instruction.name) + } +} From d8d0ff579b2a066184f27de401d29b85bfc4c062 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 26 Jul 2024 13:37:05 +0800 Subject: [PATCH 003/140] [nix] bump rvv-codegen Signed-off-by: Avimitin --- nix/pkgs/rvv-codegen.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/pkgs/rvv-codegen.nix b/nix/pkgs/rvv-codegen.nix index 4e6d3ed7b..bffddec26 100644 --- a/nix/pkgs/rvv-codegen.nix +++ b/nix/pkgs/rvv-codegen.nix @@ -11,10 +11,10 @@ buildGoModule { pname = "riscv-vector-test"; version = "unstable-2023-04-12"; src = fetchFromGitHub { - owner = "ksco"; + owner = "chipsalliance"; repo = "riscv-vector-tests"; - rev = "bafa717d37b9bef3e80b66a50b01c22f532306bc"; - hash = "sha256-C91HUDyMykS3qM9h+rJ2uKAJcKHkoakw9I+wwtco0m8="; + rev = "caae5c8fcf465be73266f9b3bd672f71a362548e"; + hash = "sha256-388MKOO+g4PjR3BcxiA8vNY7itDcIhz88vZmMZkbsj8="; }; doCheck = false; vendorHash = "sha256-9cQlivpHg6IDYpmgBp34n6BR/I0FIYnmrXCuiGmAhNE="; From 420460ab6a54cdff4231829e9789a661a1e228d3 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 26 Jul 2024 20:38:20 +0800 Subject: [PATCH 004/140] [ci] add psyduck into CI Signed-off-by: Avimitin --- .github/cases/psyduck/default.json | 529 +++++++++++++++++++++++++++++ .github/cases/psyduck/perf.json | 9 + tests/codegen/common.txt | 154 +++++---- 3 files changed, 623 insertions(+), 69 deletions(-) create mode 100644 .github/cases/psyduck/default.json create mode 100644 .github/cases/psyduck/perf.json diff --git a/.github/cases/psyduck/default.json b/.github/cases/psyduck/default.json new file mode 100644 index 000000000..460e5e84f --- /dev/null +++ b/.github/cases/psyduck/default.json @@ -0,0 +1,529 @@ +{ + "asm.mmm": 0, + "asm.smoke": 0, + "codegen.vaadd_vv": 0, + "codegen.vaadd_vx": 0, + "codegen.vaaddu_vv": 0, + "codegen.vaaddu_vx": 0, + "codegen.vadc_vim": 0, + "codegen.vadc_vvm": 0, + "codegen.vadc_vxm": 0, + "codegen.vadd_vi": 0, + "codegen.vadd_vv": 0, + "codegen.vadd_vx": 0, + "codegen.vand_vi": 0, + "codegen.vand_vv": 0, + "codegen.vand_vx": 0, + "codegen.vandn_vv": 0, + "codegen.vandn_vx": 0, + "codegen.vasub_vv": 0, + "codegen.vasub_vx": 0, + "codegen.vasubu_vv": 0, + "codegen.vasubu_vx": 0, + "codegen.vbrev8_v": 0, + "codegen.vbrev_v": 0, + "codegen.vclz_v": 0, + "codegen.vcompress_vm": 0, + "codegen.vcpop_m": 0, + "codegen.vcpop_v": 0, + "codegen.vctz_v": 0, + "codegen.vdiv_vv": 0, + "codegen.vdiv_vx": 0, + "codegen.vdivu_vv": 0, + "codegen.vdivu_vx": 0, + "codegen.vfadd_vf": 0, + "codegen.vfadd_vv": 0, + "codegen.vfclass_v": 0, + "codegen.vfcvt_f_x_v": 0, + "codegen.vfcvt_f_xu_v": 0, + "codegen.vfcvt_rtz_x_f_v": 0, + "codegen.vfcvt_rtz_xu_f_v": 0, + "codegen.vfcvt_x_f_v": 0, + "codegen.vfcvt_xu_f_v": 0, + "codegen.vfdiv_vf": 0, + "codegen.vfdiv_vv": 0, + "codegen.vfirst_m": 0, + "codegen.vfmacc_vf": 0, + "codegen.vfmacc_vv": 0, + "codegen.vfmadd_vf": 0, + "codegen.vfmadd_vv": 0, + "codegen.vfmax_vf": 0, + "codegen.vfmax_vv": 0, + "codegen.vfmerge_vfm": 0, + "codegen.vfmin_vf": 0, + "codegen.vfmin_vv": 0, + "codegen.vfmsac_vf": 0, + "codegen.vfmsac_vv": 0, + "codegen.vfmsub_vf": 0, + "codegen.vfmsub_vv": 0, + "codegen.vfmul_vf": 0, + "codegen.vfmul_vv": 0, + "codegen.vfmv_f_s": 0, + "codegen.vfmv_s_f": 0, + "codegen.vfmv_v_f": 0, + "codegen.vfnmacc_vf": 0, + "codegen.vfnmacc_vv": 0, + "codegen.vfnmadd_vf": 0, + "codegen.vfnmadd_vv": 0, + "codegen.vfnmsac_vf": 0, + "codegen.vfnmsac_vv": 0, + "codegen.vfnmsub_vf": 0, + "codegen.vfnmsub_vv": 0, + "codegen.vfrdiv_vf": 0, + "codegen.vfrec7_v": 0, + "codegen.vfredmax_vs": 0, + "codegen.vfredmin_vs": 0, + "codegen.vfredosum_vs": 0, + "codegen.vfredusum_vs": 0, + "codegen.vfrsqrt7_v": 0, + "codegen.vfrsub_vf": 0, + "codegen.vfsgnj_vf": 0, + "codegen.vfsgnj_vv": 0, + "codegen.vfsgnjn_vf": 0, + "codegen.vfsgnjn_vv": 0, + "codegen.vfsgnjx_vf": 0, + "codegen.vfsgnjx_vv": 0, + "codegen.vfsqrt_v": 0, + "codegen.vfsub_vf": 0, + "codegen.vfsub_vv": 0, + "codegen.vid_v": 0, + "codegen.viota_m": 0, + "codegen.vl1re16_v": 0, + "codegen.vl1re32_v": 0, + "codegen.vl1re8_v": 0, + "codegen.vl2re16_v": 0, + "codegen.vl2re32_v": 0, + "codegen.vl2re8_v": 0, + "codegen.vl4re16_v": 0, + "codegen.vl4re32_v": 0, + "codegen.vl4re8_v": 0, + "codegen.vl8re16_v": 0, + "codegen.vl8re32_v": 0, + "codegen.vl8re8_v": 0, + "codegen.vle16_v": 0, + "codegen.vle16ff_v": 0, + "codegen.vle32_v": 0, + "codegen.vle32ff_v": 0, + "codegen.vle8_v": 0, + "codegen.vle8ff_v": 0, + "codegen.vlm_v": 0, + "codegen.vloxei16_v": 0, + "codegen.vloxei32_v": 0, + "codegen.vloxei8_v": 0, + "codegen.vloxseg2ei16_v": 0, + "codegen.vloxseg2ei32_v": 0, + "codegen.vloxseg2ei8_v": 0, + "codegen.vloxseg3ei16_v": 0, + "codegen.vloxseg3ei32_v": 0, + "codegen.vloxseg3ei8_v": 0, + "codegen.vloxseg4ei16_v": 0, + "codegen.vloxseg4ei32_v": 0, + "codegen.vloxseg4ei8_v": 0, + "codegen.vloxseg5ei16_v": 0, + "codegen.vloxseg5ei32_v": 0, + "codegen.vloxseg5ei8_v": 0, + "codegen.vloxseg6ei16_v": 0, + "codegen.vloxseg6ei32_v": 0, + "codegen.vloxseg6ei8_v": 0, + "codegen.vloxseg7ei16_v": 0, + "codegen.vloxseg7ei32_v": 0, + "codegen.vloxseg7ei8_v": 0, + "codegen.vloxseg8ei16_v": 0, + "codegen.vloxseg8ei32_v": 0, + "codegen.vloxseg8ei8_v": 0, + "codegen.vlse16_v": 0, + "codegen.vlse32_v": 0, + "codegen.vlse8_v": 0, + "codegen.vlseg2e16_v": 0, + "codegen.vlseg2e32_v": 0, + "codegen.vlseg2e8_v": 0, + "codegen.vlseg3e16_v": 0, + "codegen.vlseg3e32_v": 0, + "codegen.vlseg3e8_v": 0, + "codegen.vlseg4e16_v": 0, + "codegen.vlseg4e32_v": 0, + "codegen.vlseg4e8_v": 0, + "codegen.vlseg5e16_v": 0, + "codegen.vlseg5e32_v": 0, + "codegen.vlseg5e8_v": 0, + "codegen.vlseg6e16_v": 0, + "codegen.vlseg6e32_v": 0, + "codegen.vlseg6e8_v": 0, + "codegen.vlseg7e16_v": 0, + "codegen.vlseg7e32_v": 0, + "codegen.vlseg7e8_v": 0, + "codegen.vlseg8e16_v": 0, + "codegen.vlseg8e32_v": 0, + "codegen.vlseg8e8_v": 0, + "codegen.vlsseg2e16_v": 0, + "codegen.vlsseg2e32_v": 0, + "codegen.vlsseg2e8_v": 0, + "codegen.vlsseg3e16_v": 0, + "codegen.vlsseg3e32_v": 0, + "codegen.vlsseg3e8_v": 0, + "codegen.vlsseg4e16_v": 0, + "codegen.vlsseg4e32_v": 0, + "codegen.vlsseg4e8_v": 0, + "codegen.vlsseg5e16_v": 0, + "codegen.vlsseg5e32_v": 0, + "codegen.vlsseg5e8_v": 0, + "codegen.vlsseg6e16_v": 0, + "codegen.vlsseg6e32_v": 0, + "codegen.vlsseg6e8_v": 0, + "codegen.vlsseg7e16_v": 0, + "codegen.vlsseg7e32_v": 0, + "codegen.vlsseg7e8_v": 0, + "codegen.vlsseg8e16_v": 0, + "codegen.vlsseg8e32_v": 0, + "codegen.vlsseg8e8_v": 0, + "codegen.vluxei16_v": 0, + "codegen.vluxei32_v": 0, + "codegen.vluxei8_v": 0, + "codegen.vluxseg2ei16_v": 0, + "codegen.vluxseg2ei32_v": 0, + "codegen.vluxseg2ei8_v": 0, + "codegen.vluxseg3ei16_v": 0, + "codegen.vluxseg3ei32_v": 0, + "codegen.vluxseg3ei8_v": 0, + "codegen.vluxseg4ei16_v": 0, + "codegen.vluxseg4ei32_v": 0, + "codegen.vluxseg4ei8_v": 0, + "codegen.vluxseg5ei16_v": 0, + "codegen.vluxseg5ei32_v": 0, + "codegen.vluxseg5ei8_v": 0, + "codegen.vluxseg6ei16_v": 0, + "codegen.vluxseg6ei32_v": 0, + "codegen.vluxseg6ei8_v": 0, + "codegen.vluxseg7ei16_v": 0, + "codegen.vluxseg7ei32_v": 0, + "codegen.vluxseg7ei8_v": 0, + "codegen.vluxseg8ei16_v": 0, + "codegen.vluxseg8ei32_v": 0, + "codegen.vluxseg8ei8_v": 0, + "codegen.vmacc_vv": 0, + "codegen.vmacc_vx": 0, + "codegen.vmadc_vi": 0, + "codegen.vmadc_vim": 0, + "codegen.vmadc_vv": 0, + "codegen.vmadc_vvm": 0, + "codegen.vmadc_vx": 0, + "codegen.vmadc_vxm": 0, + "codegen.vmadd_vv": 0, + "codegen.vmadd_vx": 0, + "codegen.vmand_mm": 0, + "codegen.vmandn_mm": 0, + "codegen.vmax_vv": 0, + "codegen.vmax_vx": 0, + "codegen.vmaxu_vv": 0, + "codegen.vmaxu_vx": 0, + "codegen.vmerge_vim": 0, + "codegen.vmerge_vvm": 0, + "codegen.vmerge_vxm": 0, + "codegen.vmfeq_vf": 0, + "codegen.vmfeq_vv": 0, + "codegen.vmfge_vf": 0, + "codegen.vmfgt_vf": 0, + "codegen.vmflt_vf": 0, + "codegen.vmflt_vv": 0, + "codegen.vmfne_vf": 0, + "codegen.vmfne_vv": 0, + "codegen.vmin_vv": 0, + "codegen.vmin_vx": 0, + "codegen.vminu_vv": 0, + "codegen.vminu_vx": 0, + "codegen.vmnand_mm": 0, + "codegen.vmnor_mm": 0, + "codegen.vmor_mm": 0, + "codegen.vmorn_mm": 0, + "codegen.vmsbc_vv": 0, + "codegen.vmsbc_vvm": 0, + "codegen.vmsbc_vx": 0, + "codegen.vmsbc_vxm": 0, + "codegen.vmsbf_m": 0, + "codegen.vmseq_vi": 0, + "codegen.vmseq_vv": 0, + "codegen.vmseq_vx": 0, + "codegen.vmsgt_vi": 0, + "codegen.vmsgt_vv": 0, + "codegen.vmsgt_vx": 0, + "codegen.vmsgtu_vi": 0, + "codegen.vmsgtu_vv": 0, + "codegen.vmsgtu_vx": 0, + "codegen.vmsif_m": 0, + "codegen.vmsle_vi": 0, + "codegen.vmsle_vv": 0, + "codegen.vmsle_vx": 0, + "codegen.vmsleu_vi": 0, + "codegen.vmsleu_vv": 0, + "codegen.vmsleu_vx": 0, + "codegen.vmslt_vv": 0, + "codegen.vmslt_vx": 0, + "codegen.vmsltu_vv": 0, + "codegen.vmsltu_vx": 0, + "codegen.vmsne_vi": 0, + "codegen.vmsne_vv": 0, + "codegen.vmsne_vx": 0, + "codegen.vmsof_m": 0, + "codegen.vmul_vv": 0, + "codegen.vmul_vx": 0, + "codegen.vmulh_vv": 0, + "codegen.vmulh_vx": 0, + "codegen.vmulhsu_vv": 0, + "codegen.vmulhsu_vx": 0, + "codegen.vmulhu_vv": 0, + "codegen.vmulhu_vx": 0, + "codegen.vmv1r_v": 0, + "codegen.vmv2r_v": 0, + "codegen.vmv4r_v": 0, + "codegen.vmv8r_v": 0, + "codegen.vmv_s_x": 0, + "codegen.vmv_v_i": 0, + "codegen.vmv_v_v": 0, + "codegen.vmv_v_x": 0, + "codegen.vmv_x_s": 0, + "codegen.vmxnor_mm": 0, + "codegen.vmxor_mm": 0, + "codegen.vnclip_wi": 0, + "codegen.vnclip_wv": 0, + "codegen.vnclip_wx": 0, + "codegen.vnclipu_wi": 0, + "codegen.vnclipu_wv": 0, + "codegen.vnclipu_wx": 0, + "codegen.vnmsac_vv": 0, + "codegen.vnmsac_vx": 0, + "codegen.vnmsub_vv": 0, + "codegen.vnmsub_vx": 0, + "codegen.vnsra_wi": 0, + "codegen.vnsra_wv": 0, + "codegen.vnsra_wx": 0, + "codegen.vnsrl_wi": 0, + "codegen.vnsrl_wv": 0, + "codegen.vnsrl_wx": 0, + "codegen.vor_vi": 0, + "codegen.vor_vv": 0, + "codegen.vor_vx": 0, + "codegen.vredand_vs": 0, + "codegen.vredmax_vs": 0, + "codegen.vredmaxu_vs": 0, + "codegen.vredmin_vs": 0, + "codegen.vredminu_vs": 0, + "codegen.vredor_vs": 0, + "codegen.vredsum_vs": 0, + "codegen.vredxor_vs": 0, + "codegen.vrem_vv": 0, + "codegen.vrem_vx": 0, + "codegen.vremu_vv": 0, + "codegen.vremu_vx": 0, + "codegen.vrev8_v": 0, + "codegen.vrgather_vi": 0, + "codegen.vrgather_vv": 0, + "codegen.vrgather_vx": 0, + "codegen.vrgatherei16_vv": 0, + "codegen.vrol_vv": 0, + "codegen.vrol_vx": 0, + "codegen.vror_vi": 0, + "codegen.vror_vv": 0, + "codegen.vror_vx": 0, + "codegen.vrsub_vi": 0, + "codegen.vrsub_vx": 0, + "codegen.vs1r_v": 0, + "codegen.vs2r_v": 0, + "codegen.vs4r_v": 0, + "codegen.vs8r_v": 0, + "codegen.vsadd_vi": 0, + "codegen.vsadd_vv": 0, + "codegen.vsadd_vx": 0, + "codegen.vsaddu_vi": 0, + "codegen.vsaddu_vv": 0, + "codegen.vsaddu_vx": 0, + "codegen.vsbc_vvm": 0, + "codegen.vsbc_vxm": 0, + "codegen.vse16_v": 0, + "codegen.vse32_v": 0, + "codegen.vse8_v": 0, + "codegen.vsetivli": 0, + "codegen.vsetvl": 0, + "codegen.vsetvli": 0, + "codegen.vsext_vf2": 0, + "codegen.vsext_vf4": 0, + "codegen.vslide1down_vx": 0, + "codegen.vslide1up_vx": 0, + "codegen.vslidedown_vi": 0, + "codegen.vslidedown_vx": 0, + "codegen.vslideup_vi": 0, + "codegen.vslideup_vx": 0, + "codegen.vsll_vi": 0, + "codegen.vsll_vv": 0, + "codegen.vsll_vx": 0, + "codegen.vsm_v": 0, + "codegen.vsmul_vv": 0, + "codegen.vsmul_vx": 0, + "codegen.vsoxei16_v": 0, + "codegen.vsoxei32_v": 0, + "codegen.vsoxei8_v": 0, + "codegen.vsoxseg2ei16_v": 0, + "codegen.vsoxseg2ei32_v": 0, + "codegen.vsoxseg2ei8_v": 0, + "codegen.vsoxseg3ei16_v": 0, + "codegen.vsoxseg3ei32_v": 0, + "codegen.vsoxseg3ei8_v": 0, + "codegen.vsoxseg4ei16_v": 0, + "codegen.vsoxseg4ei32_v": 0, + "codegen.vsoxseg4ei8_v": 0, + "codegen.vsoxseg5ei16_v": 0, + "codegen.vsoxseg5ei32_v": 0, + "codegen.vsoxseg5ei8_v": 0, + "codegen.vsoxseg6ei16_v": 0, + "codegen.vsoxseg6ei32_v": 0, + "codegen.vsoxseg6ei8_v": 0, + "codegen.vsoxseg7ei16_v": 0, + "codegen.vsoxseg7ei32_v": 0, + "codegen.vsoxseg7ei8_v": 0, + "codegen.vsoxseg8ei16_v": 0, + "codegen.vsoxseg8ei32_v": 0, + "codegen.vsoxseg8ei8_v": 0, + "codegen.vsra_vi": 0, + "codegen.vsra_vv": 0, + "codegen.vsra_vx": 0, + "codegen.vsrl_vi": 0, + "codegen.vsrl_vv": 0, + "codegen.vsrl_vx": 0, + "codegen.vsse16_v": 0, + "codegen.vsse32_v": 0, + "codegen.vsse8_v": 0, + "codegen.vsseg2e16_v": 0, + "codegen.vsseg2e32_v": 0, + "codegen.vsseg2e8_v": 0, + "codegen.vsseg3e16_v": 0, + "codegen.vsseg3e32_v": 0, + "codegen.vsseg3e8_v": 0, + "codegen.vsseg4e16_v": 0, + "codegen.vsseg4e32_v": 0, + "codegen.vsseg4e8_v": 0, + "codegen.vsseg5e16_v": 0, + "codegen.vsseg5e32_v": 0, + "codegen.vsseg5e8_v": 0, + "codegen.vsseg6e16_v": 0, + "codegen.vsseg6e32_v": 0, + "codegen.vsseg6e8_v": 0, + "codegen.vsseg7e16_v": 0, + "codegen.vsseg7e32_v": 0, + "codegen.vsseg7e8_v": 0, + "codegen.vsseg8e16_v": 0, + "codegen.vsseg8e32_v": 0, + "codegen.vsseg8e8_v": 0, + "codegen.vssra_vi": 0, + "codegen.vssra_vv": 0, + "codegen.vssra_vx": 0, + "codegen.vssrl_vi": 0, + "codegen.vssrl_vv": 0, + "codegen.vssrl_vx": 0, + "codegen.vssseg2e16_v": 0, + "codegen.vssseg2e32_v": 0, + "codegen.vssseg2e8_v": 0, + "codegen.vssseg3e16_v": 0, + "codegen.vssseg3e32_v": 0, + "codegen.vssseg3e8_v": 0, + "codegen.vssseg4e16_v": 0, + "codegen.vssseg4e32_v": 0, + "codegen.vssseg4e8_v": 0, + "codegen.vssseg5e16_v": 0, + "codegen.vssseg5e32_v": 0, + "codegen.vssseg5e8_v": 0, + "codegen.vssseg6e16_v": 0, + "codegen.vssseg6e32_v": 0, + "codegen.vssseg6e8_v": 0, + "codegen.vssseg7e16_v": 0, + "codegen.vssseg7e32_v": 0, + "codegen.vssseg7e8_v": 0, + "codegen.vssseg8e16_v": 0, + "codegen.vssseg8e32_v": 0, + "codegen.vssseg8e8_v": 0, + "codegen.vssub_vv": 0, + "codegen.vssub_vx": 0, + "codegen.vssubu_vv": 0, + "codegen.vssubu_vx": 0, + "codegen.vsub_vv": 0, + "codegen.vsub_vx": 0, + "codegen.vsuxei16_v": 0, + "codegen.vsuxei32_v": 0, + "codegen.vsuxei8_v": 0, + "codegen.vsuxseg2ei16_v": 0, + "codegen.vsuxseg2ei32_v": 0, + "codegen.vsuxseg2ei8_v": 0, + "codegen.vsuxseg3ei16_v": 0, + "codegen.vsuxseg3ei32_v": 0, + "codegen.vsuxseg3ei8_v": 0, + "codegen.vsuxseg4ei16_v": 0, + "codegen.vsuxseg4ei32_v": 0, + "codegen.vsuxseg4ei8_v": 0, + "codegen.vsuxseg5ei16_v": 0, + "codegen.vsuxseg5ei32_v": 0, + "codegen.vsuxseg5ei8_v": 0, + "codegen.vsuxseg6ei16_v": 0, + "codegen.vsuxseg6ei32_v": 0, + "codegen.vsuxseg6ei8_v": 0, + "codegen.vsuxseg7ei16_v": 0, + "codegen.vsuxseg7ei32_v": 0, + "codegen.vsuxseg7ei8_v": 0, + "codegen.vsuxseg8ei16_v": 0, + "codegen.vsuxseg8ei32_v": 0, + "codegen.vsuxseg8ei8_v": 0, + "codegen.vwadd_vv": 0, + "codegen.vwadd_vx": 0, + "codegen.vwadd_wv": 0, + "codegen.vwadd_wx": 0, + "codegen.vwaddu_vv": 0, + "codegen.vwaddu_vx": 0, + "codegen.vwaddu_wv": 0, + "codegen.vwaddu_wx": 0, + "codegen.vwmacc_vv": 0, + "codegen.vwmacc_vx": 0, + "codegen.vwmaccsu_vv": 0, + "codegen.vwmaccsu_vx": 0, + "codegen.vwmaccu_vv": 0, + "codegen.vwmaccu_vx": 0, + "codegen.vwmaccus_vx": 0, + "codegen.vwmul_vv": 0, + "codegen.vwmul_vx": 0, + "codegen.vwmulsu_vv": 0, + "codegen.vwmulsu_vx": 0, + "codegen.vwmulu_vv": 0, + "codegen.vwmulu_vx": 0, + "codegen.vwredsum_vs": 0, + "codegen.vwredsumu_vs": 0, + "codegen.vwsll_vi": 0, + "codegen.vwsll_vv": 0, + "codegen.vwsll_vx": 0, + "codegen.vwsub_vv": 0, + "codegen.vwsub_vx": 0, + "codegen.vwsub_wv": 0, + "codegen.vwsub_wx": 0, + "codegen.vwsubu_vv": 0, + "codegen.vwsubu_vx": 0, + "codegen.vwsubu_wv": 0, + "codegen.vwsubu_wx": 0, + "codegen.vxor_vi": 0, + "codegen.vxor_vv": 0, + "codegen.vxor_vx": 0, + "codegen.vzext_vf2": 0, + "codegen.vzext_vf4": 0, + "intrinsic.conv2d_less_m2": 0, + "intrinsic.linear_normalization": 0, + "intrinsic.softmax": 0, + "mlir.hello": 0, + "mlir.rvv_vp_intrinsic_add": 0, + "mlir.rvv_vp_intrinsic_add_scalable": 0, + "mlir.stripmining": 0, + "rvv_bench.ascii_to_utf16": 0, + "rvv_bench.ascii_to_utf32": 0, + "rvv_bench.byteswap": 0, + "rvv_bench.chacha20": 0, + "rvv_bench.mandelbrot": 0, + "rvv_bench.memcpy": 0, + "rvv_bench.memset": 0, + "rvv_bench.mergelines": 0, + "rvv_bench.poly1305": 0, + "rvv_bench.strlen": 0, + "rvv_bench.utf8_count": 0 +} diff --git a/.github/cases/psyduck/perf.json b/.github/cases/psyduck/perf.json new file mode 100644 index 000000000..3527224f9 --- /dev/null +++ b/.github/cases/psyduck/perf.json @@ -0,0 +1,9 @@ +{ + "mlir.conv": 0, + "mlir.matmul": 0, + "mlir.stripmining": 0, + "intrinsic.conv2d_less_m2": 0, + "intrinsic.linear_normalization": 0, + "intrinsic.matmul": 0, + "intrinsic.softmax": 0 +} diff --git a/tests/codegen/common.txt b/tests/codegen/common.txt index cb27e22f4..506b3a08a 100644 --- a/tests/codegen/common.txt +++ b/tests/codegen/common.txt @@ -11,12 +11,19 @@ vadd.vx vand.vi vand.vv vand.vx +vandn.vv +vandn.vx vasub.vv vasub.vx vasubu.vv vasubu.vx +vbrev.v +vbrev8.v +vclz.v vcompress.vm vcpop.m +vcpop.v +vctz.v vdiv.vv vdiv.vx vdivu.vv @@ -24,118 +31,118 @@ vdivu.vx vfirst.m vid.v viota.m -vl1re8.v vl1re16.v vl1re32.v -vl2re8.v +vl1re8.v vl2re16.v vl2re32.v -vl4re8.v +vl2re8.v vl4re16.v vl4re32.v -vl8re8.v +vl4re8.v vl8re16.v vl8re32.v -vle8.v -vle8ff.v +vl8re8.v vle16.v vle16ff.v vle32.v vle32ff.v +vle8.v +vle8ff.v vlm.v -vloxei8.v vloxei16.v vloxei32.v -vloxseg2ei8.v +vloxei8.v vloxseg2ei16.v vloxseg2ei32.v -vloxseg3ei8.v +vloxseg2ei8.v vloxseg3ei16.v vloxseg3ei32.v -vloxseg4ei8.v +vloxseg3ei8.v vloxseg4ei16.v vloxseg4ei32.v -vloxseg5ei8.v +vloxseg4ei8.v vloxseg5ei16.v vloxseg5ei32.v -vloxseg6ei8.v +vloxseg5ei8.v vloxseg6ei16.v vloxseg6ei32.v -vloxseg7ei8.v +vloxseg6ei8.v vloxseg7ei16.v vloxseg7ei32.v -vloxseg8ei8.v +vloxseg7ei8.v vloxseg8ei16.v vloxseg8ei32.v -vlse8.v +vloxseg8ei8.v vlse16.v vlse32.v -vlseg2e8.v +vlse8.v vlseg2e16.v vlseg2e32.v -vlseg3e8.v +vlseg2e8.v vlseg3e16.v vlseg3e32.v -vlseg4e8.v +vlseg3e8.v vlseg4e16.v vlseg4e32.v -vlseg5e8.v +vlseg4e8.v vlseg5e16.v vlseg5e32.v -vlseg6e8.v +vlseg5e8.v vlseg6e16.v vlseg6e32.v -vlseg7e8.v +vlseg6e8.v vlseg7e16.v vlseg7e32.v -vlseg8e8.v +vlseg7e8.v vlseg8e16.v vlseg8e32.v -vlsseg2e8.v +vlseg8e8.v vlsseg2e16.v vlsseg2e32.v -vlsseg3e8.v +vlsseg2e8.v vlsseg3e16.v vlsseg3e32.v -vlsseg4e8.v +vlsseg3e8.v vlsseg4e16.v vlsseg4e32.v -vlsseg5e8.v +vlsseg4e8.v vlsseg5e16.v vlsseg5e32.v -vlsseg6e8.v +vlsseg5e8.v vlsseg6e16.v vlsseg6e32.v -vlsseg7e8.v +vlsseg6e8.v vlsseg7e16.v vlsseg7e32.v -vlsseg8e8.v +vlsseg7e8.v vlsseg8e16.v vlsseg8e32.v -vluxei8.v +vlsseg8e8.v vluxei16.v vluxei32.v -vluxseg2ei8.v +vluxei8.v vluxseg2ei16.v vluxseg2ei32.v -vluxseg3ei8.v +vluxseg2ei8.v vluxseg3ei16.v vluxseg3ei32.v -vluxseg4ei8.v +vluxseg3ei8.v vluxseg4ei16.v vluxseg4ei32.v -vluxseg5ei8.v +vluxseg4ei8.v vluxseg5ei16.v vluxseg5ei32.v -vluxseg6ei8.v +vluxseg5ei8.v vluxseg6ei16.v vluxseg6ei32.v -vluxseg7ei8.v +vluxseg6ei8.v vluxseg7ei16.v vluxseg7ei32.v -vluxseg8ei8.v +vluxseg7ei8.v vluxseg8ei16.v vluxseg8ei32.v +vluxseg8ei8.v vmacc.vv vmacc.vx vmadc.vi @@ -242,10 +249,16 @@ vrem.vv vrem.vx vremu.vv vremu.vx +vrev8.v vrgather.vi vrgather.vv vrgather.vx vrgatherei16.vv +vrol.vv +vrol.vx +vror.vi +vror.vv +vror.vx vrsub.vi vrsub.vx vs1r.v @@ -260,9 +273,9 @@ vsaddu.vv vsaddu.vx vsbc.vvm vsbc.vxm -vse8.v vse16.v vse32.v +vse8.v vsetivli vsetvl vsetvli @@ -280,117 +293,117 @@ vsll.vx vsm.v vsmul.vv vsmul.vx -vsoxei8.v vsoxei16.v vsoxei32.v -vsoxseg2ei8.v +vsoxei8.v vsoxseg2ei16.v vsoxseg2ei32.v -vsoxseg3ei8.v +vsoxseg2ei8.v vsoxseg3ei16.v vsoxseg3ei32.v -vsoxseg4ei8.v +vsoxseg3ei8.v vsoxseg4ei16.v vsoxseg4ei32.v -vsoxseg5ei8.v +vsoxseg4ei8.v vsoxseg5ei16.v vsoxseg5ei32.v -vsoxseg6ei8.v +vsoxseg5ei8.v vsoxseg6ei16.v vsoxseg6ei32.v -vsoxseg7ei8.v +vsoxseg6ei8.v vsoxseg7ei16.v vsoxseg7ei32.v -vsoxseg8ei8.v +vsoxseg7ei8.v vsoxseg8ei16.v vsoxseg8ei32.v +vsoxseg8ei8.v vsra.vi vsra.vv vsra.vx vsrl.vi vsrl.vv vsrl.vx -vsse8.v vsse16.v vsse32.v -vsseg2e8.v +vsse8.v vsseg2e16.v vsseg2e32.v -vsseg3e8.v +vsseg2e8.v vsseg3e16.v vsseg3e32.v -vsseg4e8.v +vsseg3e8.v vsseg4e16.v vsseg4e32.v -vsseg5e8.v +vsseg4e8.v vsseg5e16.v vsseg5e32.v -vsseg6e8.v +vsseg5e8.v vsseg6e16.v vsseg6e32.v -vsseg7e8.v +vsseg6e8.v vsseg7e16.v vsseg7e32.v -vsseg8e8.v +vsseg7e8.v vsseg8e16.v vsseg8e32.v +vsseg8e8.v vssra.vi vssra.vv vssra.vx vssrl.vi vssrl.vv vssrl.vx -vssseg2e8.v vssseg2e16.v vssseg2e32.v -vssseg3e8.v +vssseg2e8.v vssseg3e16.v vssseg3e32.v -vssseg4e8.v +vssseg3e8.v vssseg4e16.v vssseg4e32.v -vssseg5e8.v +vssseg4e8.v vssseg5e16.v vssseg5e32.v -vssseg6e8.v +vssseg5e8.v vssseg6e16.v vssseg6e32.v -vssseg7e8.v +vssseg6e8.v vssseg7e16.v vssseg7e32.v -vssseg8e8.v +vssseg7e8.v vssseg8e16.v vssseg8e32.v +vssseg8e8.v vssub.vv vssub.vx vssubu.vv vssubu.vx vsub.vv vsub.vx -vsuxei8.v vsuxei16.v vsuxei32.v -vsuxseg2ei8.v +vsuxei8.v vsuxseg2ei16.v vsuxseg2ei32.v -vsuxseg3ei8.v +vsuxseg2ei8.v vsuxseg3ei16.v vsuxseg3ei32.v -vsuxseg4ei8.v +vsuxseg3ei8.v vsuxseg4ei16.v vsuxseg4ei32.v -vsuxseg5ei8.v +vsuxseg4ei8.v vsuxseg5ei16.v vsuxseg5ei32.v -vsuxseg6ei8.v +vsuxseg5ei8.v vsuxseg6ei16.v vsuxseg6ei32.v -vsuxseg7ei8.v +vsuxseg6ei8.v vsuxseg7ei16.v vsuxseg7ei32.v -vsuxseg8ei8.v +vsuxseg7ei8.v vsuxseg8ei16.v vsuxseg8ei32.v +vsuxseg8ei8.v vwadd.vv vwadd.vx vwadd.wv @@ -414,6 +427,9 @@ vwmulu.vv vwmulu.vx vwredsum.vs vwredsumu.vs +vwsll.vi +vwsll.vv +vwsll.vx vwsub.vv vwsub.vx vwsub.wv From 237fbb5eab109a90101c4df7e5400e2cdc997abf Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 1 Aug 2024 15:49:55 +0800 Subject: [PATCH 005/140] [nix] fix codegen compile for zvbb insn Signed-off-by: Avimitin --- tests/builder.nix | 30 +++++++++++++++++++----------- tests/codegen/common.txt | 16 ---------------- tests/codegen/default.nix | 5 ++--- tests/codegen/zvbb.txt | 2 +- tests/default.nix | 5 +---- 5 files changed, 23 insertions(+), 35 deletions(-) diff --git a/tests/builder.nix b/tests/builder.nix index f250a8b13..4730af191 100644 --- a/tests/builder.nix +++ b/tests/builder.nix @@ -26,17 +26,25 @@ let CC = "${stdenv.targetPlatform.config}-cc"; - NIX_CFLAGS_COMPILE = [ - "-mabi=ilp32f" - "-march=${rtlDesignMetadata.march}" - "-mno-relax" - "-static" - "-mcmodel=medany" - "-fvisibility=hidden" - "-fno-PIC" - "-g" - "-O3" - ]; + NIX_CFLAGS_COMPILE = + let + march = lib.pipe rtlDesignMetadata.march [ + (lib.splitString "_") + (map (ext: if ext == "zvbb" then "zvbb1" else ext)) + (lib.concatStringsSep "_") + ]; + in + [ + "-mabi=ilp32f" + "-march=${march}" + "-mno-relax" + "-static" + "-mcmodel=medany" + "-fvisibility=hidden" + "-fno-PIC" + "-g" + "-O3" + ] ++ lib.optionals (lib.elem "zvbb" (lib.splitString "_" rtlDesignMetadata.march)) [ "-menable-experimental-extensions" ]; installPhase = '' runHook preInstall diff --git a/tests/codegen/common.txt b/tests/codegen/common.txt index 506b3a08a..98fc1ae39 100644 --- a/tests/codegen/common.txt +++ b/tests/codegen/common.txt @@ -11,19 +11,12 @@ vadd.vx vand.vi vand.vv vand.vx -vandn.vv -vandn.vx vasub.vv vasub.vx vasubu.vv vasubu.vx -vbrev.v -vbrev8.v -vclz.v vcompress.vm vcpop.m -vcpop.v -vctz.v vdiv.vv vdiv.vx vdivu.vv @@ -249,16 +242,10 @@ vrem.vv vrem.vx vremu.vv vremu.vx -vrev8.v vrgather.vi vrgather.vv vrgather.vx vrgatherei16.vv -vrol.vv -vrol.vx -vror.vi -vror.vv -vror.vx vrsub.vi vrsub.vx vs1r.v @@ -427,9 +414,6 @@ vwmulu.vv vwmulu.vx vwredsum.vs vwredsumu.vs -vwsll.vi -vwsll.vv -vwsll.vx vwsub.vv vwsub.vx vwsub.wv diff --git a/tests/codegen/default.nix b/tests/codegen/default.nix index fd8edb612..e4883ade6 100644 --- a/tests/codegen/default.nix +++ b/tests/codegen/default.nix @@ -74,11 +74,10 @@ let commonTests = buildTestsFromFile ./common.txt { featuresRequired = [ ]; }; fpTests = buildTestsFromFile ./fp.txt { featuresRequired = [ "zve32f" ]; }; zvbbTests = buildTestsFromFile ./zvbb.txt { featuresRequired = [ "zvbb" ]; }; - hasFeature = feat: lib.any (f: feat == f) currentFeatures; in lib.recurseIntoAttrs ( commonTests // - lib.optionalAttrs (hasFeature "zve32f") fpTests // - lib.optionalAttrs (hasFeature "zvbb") zvbbTests + lib.optionalAttrs (lib.elem "zve32f" currentFeatures) fpTests // + lib.optionalAttrs (lib.elem "zvbb" currentFeatures) zvbbTests ) diff --git a/tests/codegen/zvbb.txt b/tests/codegen/zvbb.txt index 77ed67621..d109f5570 100644 --- a/tests/codegen/zvbb.txt +++ b/tests/codegen/zvbb.txt @@ -1,7 +1,7 @@ vandn.vv vandn.vx vbrev.v -vbreav8.v +vbrev8.v vclz.v vcpop.v vctz.v diff --git a/tests/default.nix b/tests/default.nix index 8a607d59e..35fbc2ab8 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -11,8 +11,6 @@ }: let - hasExt = cmp: lib.any (ext: cmp == (lib.toLower ext)) rtlDesignMetadata.extensions; - # Add an extra abstract layer between test case and RTL design, so that we can have clean and organized way # for developer to specify their required features without the need to parse ISA string themselves. currentFeatures = [ @@ -20,8 +18,7 @@ let "dlen:${rtlDesignMetadata.dlen}" "xlen:${if (lib.hasPrefix "rv32" rtlDesignMetadata.march) then "32" else "64"}" ] - ++ lib.optionals (hasExt "zve32f") [ "zve32f" ] - ++ lib.optionals (hasExt "zvbb") [ "zvbb" ]; + ++ (lib.splitString "_" rtlDesignMetadata.march); # isSubSetOf m n: n is subset of m isSubsetOf = m: n: lib.all (x: lib.elem x m) n; From 766c5e37ed465cbeebe21735d6f9044f12a645e0 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 2 Aug 2024 16:00:16 +0800 Subject: [PATCH 006/140] [ci] dont patch elf for VCS result --- tests/make-emu-result.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/make-emu-result.nix b/tests/make-emu-result.nix index a8540f806..5507e15f4 100644 --- a/tests/make-emu-result.nix +++ b/tests/make-emu-result.nix @@ -108,6 +108,7 @@ let passthru.with-vcs = self.overrideAttrs (old: { name = old.name + "-with-vcs"; __noChroot = true; + dontPatchELF = true; buildPhase = '' runHook preBuild @@ -143,6 +144,7 @@ let passthru.with-vcs-trace = self.overrideAttrs (old: { name = old.name + "-with-vcs-trace"; __noChroot = true; + dontPatchELF = true; buildPhase = '' runHook preBuild From 755f0f1dccd11142fe017569cf8544de7f16380d Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 6 Aug 2024 21:35:33 +0000 Subject: [PATCH 007/140] [deps] Bump T1 dependencies --- nix/t1/_sources/generated.json | 8 ++++---- nix/t1/_sources/generated.nix | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nix/t1/_sources/generated.json b/nix/t1/_sources/generated.json index 20f84a5e5..b11295822 100644 --- a/nix/t1/_sources/generated.json +++ b/nix/t1/_sources/generated.json @@ -41,7 +41,7 @@ }, "chisel": { "cargoLocks": null, - "date": "2024-08-02", + "date": "2024-08-06", "extract": null, "name": "chisel", "passthru": null, @@ -53,11 +53,11 @@ "name": null, "owner": "chipsalliance", "repo": "chisel", - "rev": "557bc5064afb34124a39e9a3677f1e647306b3f6", - "sha256": "sha256-ZYk76WOd4OZrimiWRw6TV/QQ/zy3u9blwwqTAMRs5uk=", + "rev": "e6df4cf02fb68191ea76387f046dc63f7cef9820", + "sha256": "sha256-l4hNHDwWYnTC1T8yH58UIFVKszdDOmstK5ljk0R1nhg=", "type": "github" }, - "version": "557bc5064afb34124a39e9a3677f1e647306b3f6" + "version": "e6df4cf02fb68191ea76387f046dc63f7cef9820" }, "chisel-interface": { "cargoLocks": null, diff --git a/nix/t1/_sources/generated.nix b/nix/t1/_sources/generated.nix index 2f4406549..7f390ca9c 100644 --- a/nix/t1/_sources/generated.nix +++ b/nix/t1/_sources/generated.nix @@ -27,15 +27,15 @@ }; chisel = { pname = "chisel"; - version = "557bc5064afb34124a39e9a3677f1e647306b3f6"; + version = "e6df4cf02fb68191ea76387f046dc63f7cef9820"; src = fetchFromGitHub { owner = "chipsalliance"; repo = "chisel"; - rev = "557bc5064afb34124a39e9a3677f1e647306b3f6"; + rev = "e6df4cf02fb68191ea76387f046dc63f7cef9820"; fetchSubmodules = false; - sha256 = "sha256-ZYk76WOd4OZrimiWRw6TV/QQ/zy3u9blwwqTAMRs5uk="; + sha256 = "sha256-l4hNHDwWYnTC1T8yH58UIFVKszdDOmstK5ljk0R1nhg="; }; - date = "2024-08-02"; + date = "2024-08-06"; }; chisel-interface = { pname = "chisel-interface"; From 83f60997b21d8e53ff0669a50435b229c7276c22 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 6 Aug 2024 17:58:14 +0800 Subject: [PATCH 008/140] [nix] add standalone LLVM build for buddy-mlir Signed-off-by: Avimitin --- nix/pkgs/buddy-llvm.nix | 76 +++++++++++++++++++++++++++++++++++++++++ nix/pkgs/buddy-mlir.nix | 50 ++++++++++----------------- 2 files changed, 94 insertions(+), 32 deletions(-) create mode 100644 nix/pkgs/buddy-llvm.nix diff --git a/nix/pkgs/buddy-llvm.nix b/nix/pkgs/buddy-llvm.nix new file mode 100644 index 000000000..af5bc1c86 --- /dev/null +++ b/nix/pkgs/buddy-llvm.nix @@ -0,0 +1,76 @@ +{ stdenv +, cmake +, ninja +, python3 +, fetchFromGitHub +}: + +let + pythonEnv = python3.withPackages (ps: [ + ps.numpy + ps.pybind11 + ps.pyyaml + ps.ml-dtypes + ]); +in +stdenv.mkDerivation rec { + name = "llvm-for-buddy-mlir"; + version = "6c59f0e1b0fb56c909ad7c9aad4bde37dc006ae0"; + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + rev = version; + hash = "sha256-bMJJ2q1hSh7m0ewclHOmIe7lOHv110rz/P7D3pw8Uiw="; + }; + + requiredSystemFeatures = [ "big-parallel" ]; + + propagatedBuildInputs = [ + pythonEnv + ]; + + nativeBuildInputs = [ + cmake + ninja + ]; + + cmakeDir = "../llvm"; + cmakeFlags = [ + "-DLLVM_ENABLE_PROJECTS=mlir" + "-DLLVM_TARGETS_TO_BUILD=host;RISCV" + "-DLLVM_ENABLE_ASSERTIONS=ON" + "-DCMAKE_BUILD_TYPE=Release" + # required for MLIR python binding + "-DMLIR_ENABLE_BINDINGS_PYTHON=ON" + # required for not, FileCheck... + "-DLLVM_INSTALL_UTILS=ON" + ]; + + outputs = [ "out" "lib" "dev" ]; + + postInstall = '' + # buddy-mlir have custom RVV backend that required LLVM backend, + # and those LLVM backend headers require this config.h header file. + # However for LLVM, this config.h is meant to be used on build phase only, + # so it will not be installed for cmake install. + # We have to do some hack + cp -v "include/llvm/Config/config.h" "$dev/include/llvm/Config/config.h" + + # move llvm-config to $dev to resolve a circular dependency + moveToOutput "bin/llvm-config*" "$dev" + + # move all lib files to $lib except lib/cmake + moveToOutput "lib" "$lib" + moveToOutput "lib/cmake" "$dev" + + # patch configuration files so each path points to the new $lib or $dev paths + substituteInPlace "$dev/lib/cmake/llvm/LLVMConfig.cmake" \ + --replace 'set(LLVM_BINARY_DIR "''${LLVM_INSTALL_PREFIX}")' 'set(LLVM_BINARY_DIR "'"$lib"'")' + substituteInPlace \ + "$dev/lib/cmake/llvm/LLVMExports-release.cmake" \ + "$dev/lib/cmake/mlir/MLIRTargets-release.cmake" \ + --replace "\''${_IMPORT_PREFIX}/lib/lib" "$lib/lib/lib" \ + --replace "\''${_IMPORT_PREFIX}/lib/objects-Release" "$lib/lib/objects-Release" \ + --replace "$out/bin/llvm-config" "$dev/bin/llvm-config" # patch path for llvm-config + ''; +} diff --git a/nix/pkgs/buddy-mlir.nix b/nix/pkgs/buddy-mlir.nix index abdae657a..f2f899cea 100644 --- a/nix/pkgs/buddy-mlir.nix +++ b/nix/pkgs/buddy-mlir.nix @@ -1,14 +1,15 @@ -{ cmake, ninja, python3, llvmPackages_17, fetchFromGitHub, fetchpatch }: +{ cmake +, ninja +, llvmPackages_17 +, fetchFromGitHub +, fetchpatch +, callPackage +}: let stdenv = llvmPackages_17.stdenv; bintools = llvmPackages_17.bintools; - buddy-llvm = fetchFromGitHub { - owner = "llvm"; - repo = "llvm-project"; - rev = "6c59f0e1b0fb56c909ad7c9aad4bde37dc006ae0"; - hash = "sha256-bMJJ2q1hSh7m0ewclHOmIe7lOHv110rz/P7D3pw8Uiw="; - }; + buddy-llvm = callPackage ./buddy-llvm.nix { inherit stdenv; }; in stdenv.mkDerivation { pname = "buddy-mlir"; @@ -17,38 +18,23 @@ stdenv.mkDerivation { src = fetchFromGitHub { owner = "buddy-compiler"; repo = "buddy-mlir"; - rev = "be2811cde9158faa0c08ad90801edf5ebfcf8e0e"; - hash = "sha256-5ZFqDZZjMbVoqbEZ1mt1RXY2oR+VSQ6wJ1dQJCGrRC4="; + rev = "d7d90a488ac0d6fc1e700e932f842c7b2bcad816"; + hash = "sha256-MhykCa6Z7Z8PpAlNh+vMuWYEOZZDyWhtMzMnFlNbGIk="; }; - unpackPhase = '' - # We can only use one-step build now...buddy-mlir have bad build system that always - # assume the build artifacts are inside of the LLVM sources. And it also relies on - # some LLVM Cpp source that are configured to be installed by default. - cp -r ${buddy-llvm} llvm-project - cp -r $src buddy-mlir - - # Directories copied from nix store are read only - chmod -R u+w llvm-project buddy-mlir - ''; - sourceRoot = "llvm-project"; - nativeBuildInputs = [ cmake ninja python3 bintools ]; + nativeBuildInputs = [ cmake ninja bintools ]; + buildInputs = [ + buddy-llvm + ]; - cmakeDir = "../llvm"; cmakeFlags = [ + "-DMLIR_DIR=${buddy-llvm.dev}/lib/cmake/mlir" + "-DLLVM_DIR=${buddy-llvm.dev}/lib/cmake/llvm" + "-DLLVM_MAIN_SRC_DIR=${buddy-llvm.src}/llvm" + "-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON" "-DCMAKE_BUILD_TYPE=Release" - "-DLLVM_INSTALL_UTILS=ON" - "-DLLVM_ENABLE_PROJECTS=mlir" - "-DLLVM_TARGETS_TO_BUILD=host;RISCV" - "-DLLVM_ENABLE_ASSERTIONS=ON" - "-DLLVM_USE_LINKER=lld" - - "-DLLVM_EXTERNAL_PROJECTS=buddy-mlir" - "-DLLVM_EXTERNAL_BUDDY_MLIR_SOURCE_DIR=../../buddy-mlir" ]; - passthru.llvm = buddy-llvm; - # No need to do check, and it also takes too much time to finish. doCheck = false; } From 23948baccce8d0f609b1d3d27affb5554b1dbdbe Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 01:21:59 +0800 Subject: [PATCH 009/140] [nix] add Python module support for buddy-mlir Signed-off-by: Avimitin --- nix/overlay.nix | 9 ++++++++- nix/pkgs/buddy-mlir.nix | 19 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/nix/overlay.nix b/nix/overlay.nix index 9306cffd7..e4e226e49 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -19,7 +19,14 @@ rec { dramsim3 = final.callPackage ./pkgs/dramsim3.nix { }; libspike = final.callPackage ./pkgs/libspike.nix { }; libspike_interfaces = final.callPackage ../difftest/spike_interfaces { }; - buddy-mlir = final.callPackage ./pkgs/buddy-mlir.nix { }; + + # DynamoCompiler doesn't support python 3.12+ yet + buddy-mlir = final.callPackage ./pkgs/buddy-mlir.nix { python3 = final.python311; }; + buddy-mlir-pyenv = final.buddy-mlir.pythonModule.withPackages (ps: [ + final.buddy-mlir + ps.torch + ]); + fetchMillDeps = final.callPackage ./pkgs/mill-builder.nix { }; circt-full = final.callPackage ./pkgs/circt-full.nix { }; rvv-codegen = final.callPackage ./pkgs/rvv-codegen.nix { }; diff --git a/nix/pkgs/buddy-mlir.nix b/nix/pkgs/buddy-mlir.nix index f2f899cea..4ec3a4ff9 100644 --- a/nix/pkgs/buddy-mlir.nix +++ b/nix/pkgs/buddy-mlir.nix @@ -3,13 +3,14 @@ , llvmPackages_17 , fetchFromGitHub , fetchpatch +, python3 , callPackage }: let stdenv = llvmPackages_17.stdenv; bintools = llvmPackages_17.bintools; - buddy-llvm = callPackage ./buddy-llvm.nix { inherit stdenv; }; + buddy-llvm = callPackage ./buddy-llvm.nix { inherit stdenv python3; }; in stdenv.mkDerivation { pname = "buddy-mlir"; @@ -37,4 +38,20 @@ stdenv.mkDerivation { # No need to do check, and it also takes too much time to finish. doCheck = false; + + # Here we concatenate the LLVM and Buddy python module into one directory for easier import + postFixup = '' + mkdir -p $out/lib/python${python3.pythonVersion}/site-packages + cp -vr $out/python_packages/buddy $out/lib/python${python3.pythonVersion}/site-packages/ + cp -vr ${buddy-llvm}/python_packages/mlir_core/mlir $out/lib/python${python3.pythonVersion}/site-packages/ + ''; + + passthru = { + llvm = buddy-llvm; + + # Below three fields are black magic that allow site-packages automatically imported with nixpkgs hooks + pythonModule = python3; + pythonPath = [ ]; + requiredPythonModules = [ ]; + }; } From f2291a66b32b26bec0008a616383adacddfc7894 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 01:22:50 +0800 Subject: [PATCH 010/140] [pytorch] add demo workload Signed-off-by: Avimitin --- tests/pytorch/demo/demo.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/pytorch/demo/demo.py diff --git a/tests/pytorch/demo/demo.py b/tests/pytorch/demo/demo.py new file mode 100644 index 000000000..f189e5ebb --- /dev/null +++ b/tests/pytorch/demo/demo.py @@ -0,0 +1,31 @@ +import torch +import torch._dynamo as dynamo +from torch._inductor.decomposition import decompositions as inductor_decomp + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa + +# Define the target function or model. +def foo(x, y): + return x * y + x + +# Define the input data. +float32_in1 = torch.randn(10).to(torch.float32) +float32_in2 = torch.randn(10).to(torch.float32) +int32_in1 = torch.randint(0, 10, (10,)).to(torch.int32) +int32_in2 = torch.randint(0, 10, (10,)).to(torch.int32) + +# Initialize the dynamo compiler. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +# Pass the function and input data to the dynamo compiler's importer, the +# importer will first build a graph. Then, lower the graph to top-level IR. +# (tosa, linalg, etc.). Finally, accepts the generated module and weight parameters. +graphs = dynamo_compiler.importer(foo, *(float32_in1, float32_in2)) +graph = graphs[0] +graph.lower_to_top_level_ir() + +print(graph._imported_module) From f2970b483f2edbb63408ee50d49b2b5d535aece3 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 15:03:51 +0800 Subject: [PATCH 011/140] [pytorch] use file write instead of print Signed-off-by: Avimitin --- tests/pytorch/demo/demo.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/pytorch/demo/demo.py b/tests/pytorch/demo/demo.py index f189e5ebb..650227eca 100644 --- a/tests/pytorch/demo/demo.py +++ b/tests/pytorch/demo/demo.py @@ -10,10 +10,8 @@ def foo(x, y): return x * y + x # Define the input data. -float32_in1 = torch.randn(10).to(torch.float32) -float32_in2 = torch.randn(10).to(torch.float32) -int32_in1 = torch.randint(0, 10, (10,)).to(torch.int32) -int32_in2 = torch.randint(0, 10, (10,)).to(torch.int32) +float32_in1 = torch.randn(512).to(torch.float32) +float32_in2 = torch.randn(512).to(torch.float32) # Initialize the dynamo compiler. dynamo_compiler = DynamoCompiler( @@ -28,4 +26,5 @@ def foo(x, y): graph = graphs[0] graph.lower_to_top_level_ir() -print(graph._imported_module) +with open("forward.mlir", "w") as mlir_module: + print(graph._imported_module, file = mlir_module) From a527de3c20047d796922c95b5cfca43f025a8be4 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 15:11:35 +0800 Subject: [PATCH 012/140] [tests] fix trace emu result Signed-off-by: Avimitin --- tests/make-emu-result.nix | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/make-emu-result.nix b/tests/make-emu-result.nix index 5507e15f4..1df18c8db 100644 --- a/tests/make-emu-result.nix +++ b/tests/make-emu-result.nix @@ -21,8 +21,8 @@ let dontUnpack = true; - difftestDriver = "${verilator-emu}/bin/online_drive"; - difftestArgs = [ + emuDriver = "${verilator-emu}/bin/online_drive"; + emuDriverArgs = [ "--elf-file" "${testCase}/bin/${testCase.pname}.elf" "--log-file" @@ -37,9 +37,9 @@ let mkdir -p "$out" - echo "[nix] Running test case ${testCase.pname} with args $difftestArgs" + echo "[nix] Running test case ${testCase.pname} with args $emuDriverArgs" - RUST_BACKTRACE=full "$difftestDriver" $difftestArgs 2> "$rtlEventOutPath" + RUST_BACKTRACE=full "$emuDriver" $emuDriverArgs 2> "$rtlEventOutPath" echo "[nix] online driver done" @@ -83,7 +83,7 @@ let passthru.with-trace = self.overrideAttrs (old: { name = old.name + "-with-trace"; emuDriver = "${verilator-emu-trace}/bin/online_drive"; - emuDriverArgs = old.emuDriverArgs ++ [ "--wave-path" "${placeholder "out"}/wave.fst" ]; + emuDriverArgs = old.emuDriverArgs or [ ] ++ [ "--wave-path" "${placeholder "out"}/wave.fst" ]; postCheck = '' if [ ! -r "$out/wave.fst" ]; then echo -e "[nix] \033[0;31mInternal Error\033[0m: waveform not found in output" From e9b7c7b8cdd912cc787de722bda36a0f694566d1 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 17:09:54 +0800 Subject: [PATCH 013/140] [tests] add build system for PyTorch test cases --- tests/default.nix | 3 +- tests/pytorch/default.nix | 101 ++++++++++++++++++++++++++++++++++ tests/pytorch/demo/config.nix | 30 ++++++++++ tests/pytorch/demo/demo.c | 39 +++++++++++++ tests/pytorch/memref.h | 28 ++++++++++ 5 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tests/pytorch/default.nix create mode 100644 tests/pytorch/demo/config.nix create mode 100644 tests/pytorch/demo/demo.c create mode 100644 tests/pytorch/memref.h diff --git a/tests/default.nix b/tests/default.nix index 35fbc2ab8..fe062d366 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -76,11 +76,12 @@ let perf = casesSelf.callPackage ./perf { }; codegen = casesSelf.callPackage ./codegen { }; rvv_bench = casesSelf.callPackage ./rvv_bench { }; + pytorch = casesSelf.callPackage ./pytorch { }; })); # remove non-case attributes in scope scopeStripped = { - inherit (scope) mlir intrinsic asm perf codegen rvv_bench; + inherit (scope) mlir intrinsic asm perf codegen rvv_bench pytorch; }; # This derivation is for internal CI use only. diff --git a/tests/pytorch/default.nix b/tests/pytorch/default.nix new file mode 100644 index 000000000..2da4a2609 --- /dev/null +++ b/tests/pytorch/default.nix @@ -0,0 +1,101 @@ +{ lib +, linkerScript +, buddy-mlir +, buddy-mlir-pyenv +, makeBuilder +, findAndBuild +, getTestRequiredFeatures +, t1main +}: + +let + + builder = makeBuilder { casePrefix = "mlir"; }; + build = { caseName, sourcePath }: + let + buddyBuildConfig = import (sourcePath + "/config.nix"); + defaultBuddyTranslateArgs = [ "--buddy-to-llvmir" ]; + defaultBuddyLLCArgs = [ + "-mtriple=riscv32" + "-target-abi=ilp32f" + "-mattr=+m,+f,+zve32f" + "-riscv-v-vector-bits-min=128" + ]; + in + builder rec { + inherit caseName; + + src = sourcePath; + + featuresRequired = getTestRequiredFeatures sourcePath; + + nativeBuildInputs = [ buddy-mlir-pyenv buddy-mlir ]; + + pythonArgs = buddyBuildConfig.pythonArgs or [ ]; + buddyTranslateArgs = buddyBuildConfig.buddyTranslateArgs or defaultBuddyTranslateArgs; + buddyLLCArgs = buddyBuildConfig.buddyLLCArgs or defaultBuddyLLCArgs; + buddyIncludes = buddyBuildConfig.includes or [ ]; + + postUnpack = '' + buddyIncludeDir="." + if [ "x$buddyIncludes" != "x" ]; then + mkdir -p buddyInclude + _buddyHeaderArray=( $buddyIncludes ) + for h in "''${_buddyHeaderArray}"; do + cp -v "$h" buddyInclude/"$(stripHash $h)" + done + + buddyIncludeDir=$PWD/buddyInclude + fi + ''; + + buildPhase = '' + runHook preBuild + + echo "Running python with args $pythonArgs" + python $pythonArgs ${caseName}.py + + # Generate multiple buddy-opt call, each will read input from former pipeline + # For example, for buddyOptArgs = [ [ "--arg-a" ], [ "--arg-b" ], [ "--arg-c" ] ] + # This will generate + # + # echo "..." + # buddy-opt forward.mlir --arg-a -o forward-1.mlir + # echo "..." + # buddy-opt forward-1.mlir --arg-b -o forward-2.mlir + # echo "..." + # buddy-opt forward-2.mlir --arg-c -o forward-3.mlir + # + ${lib.concatStringsSep "\n" ( + lib.imap0 + (idx: args: '' + echo "Running buddy-opt with args ${lib.escapeShellArgs args}" + buddy-opt \ + forward${if idx == 0 then "" else "-${toString idx}"}.mlir \ + ${lib.escapeShellArgs args} \ + -o forward-${toString (idx+1)}.mlir + '') + buddyBuildConfig.buddyOptArgs + )} + + # Pick up the last optimized MLIR file + echo "Running buddy-translate with args $buddyTranslateArgs" + buddy-translate forward-${with builtins; toString (length buddyBuildConfig.buddyOptArgs)}.mlir \ + $buddyTranslateArgs -o forward.ll + + echo "Running buddy-llc with args $buddyLLCArgs" + buddy-llc forward.ll $buddyLLCArgs --filetype=obj -o forward.o + + echo "Using include dir $buddyIncludeDir" + $CC -T${linkerScript} \ + -I$buddyIncludeDir \ + ${caseName}.c forward.o ${t1main} \ + -o $pname.elf + + runHook postBuild + ''; + + meta.description = "testcase '${caseName}', written in MLIR"; + }; +in +findAndBuild ./. build diff --git a/tests/pytorch/demo/config.nix b/tests/pytorch/demo/config.nix new file mode 100644 index 000000000..d00359f79 --- /dev/null +++ b/tests/pytorch/demo/config.nix @@ -0,0 +1,30 @@ +{ + includes = [ + ../memref.h + ]; + + buddyOptArgs = [ + [ + "--pass-pipeline" + "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" + ] + [ + "--pass-pipeline" + "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers))" + ] + [ + "--lower-affine" + "--convert-math-to-llvm" + "--convert-math-to-libm" + "--convert-scf-to-cf" + "--convert-arith-to-llvm" + "--expand-strided-metadata" + "--finalize-memref-to-llvm" + "--lower-vector-exp" + "--lower-rvv=rv32" + "--convert-vector-to-llvm" + "--convert-func-to-llvm" + "--reconcile-unrealized-casts" + ] + ]; +} diff --git a/tests/pytorch/demo/demo.c b/tests/pytorch/demo/demo.c new file mode 100644 index 000000000..6b8b971bb --- /dev/null +++ b/tests/pytorch/demo/demo.c @@ -0,0 +1,39 @@ +#include "memref.h" + +NEW_MEMREF(float, 1); + +extern void _mlir_ciface_forward(struct MemRef_float_dim1 *output, + struct MemRef_float_dim1 *arg1, + struct MemRef_float_dim1 *arg2); + +__attribute((section(".vdata"))) float input_float_0[512] = {1, 2, 3}; +struct MemRef_float_dim1 input1 = { + .allocatedPtr = input_float_0, + .alignedPtr = input_float_0, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +__attribute((section(".vdata"))) float input_float_1[512] = {4, 5, 6}; +struct MemRef_float_dim1 input2 = { + .allocatedPtr = input_float_1, + .alignedPtr = input_float_1, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +__attribute((section(".vdata"))) float output_float_0[512]; +struct MemRef_float_dim1 output = { + .allocatedPtr = output_float_0, + .alignedPtr = output_float_0, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +int test() { + _mlir_ciface_forward(&output, &input1, &input2); + return 0; +} diff --git a/tests/pytorch/memref.h b/tests/pytorch/memref.h new file mode 100644 index 000000000..c468af15f --- /dev/null +++ b/tests/pytorch/memref.h @@ -0,0 +1,28 @@ +#ifndef MEMREF_H +#define MEMREF_H + +// Generate a new struct with T-type, N-dimension memref with name +// MemRef_T_dimN. +// +// Example: +// +// NEW_MEMREF(float, 2); +// // Equals to +// struct MemRef_float_dim2 { +// float *allocatedPtr; +// float *alignedPtr; +// int offset; +// int sizes[2]; +// int strides[2]; +// }; +// +#define NEW_MEMREF(T, N) \ + struct MemRef_##T##_dim##N { \ + T *allocatedPtr; \ + T *alignedPtr; \ + int offset; \ + int sizes[N]; \ + int strides[N]; \ + } + +#endif From 73e22ab4dd27a8baeaade014348a636aacb42485 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 17:35:06 +0800 Subject: [PATCH 014/140] [doc] add document for creating PyTorch tests --- tests/pytorch/README.md | 140 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tests/pytorch/README.md diff --git a/tests/pytorch/README.md b/tests/pytorch/README.md new file mode 100644 index 000000000..cf638e951 --- /dev/null +++ b/tests/pytorch/README.md @@ -0,0 +1,140 @@ +## How to add tests + +To create a new PyTorch test, you can follow the below instruction. + +Assuming that the new PyTorch test have project name call `demo`, let's create the test skeleton: + +```bash +cd tests/pytorch +mkdir -p demo +cd demo +touch demo.c demo.py config.nix +``` + +Developers should put their PyTorch implementation into ".py" file. +For each PyTorch tests, developers must write the MLIR model to "forward.mlir" file. + +```python +# demo.py +#... +with open("forward.mlir", "w") as mlir_module: + print(graph._imported_module, file = mlir_module) +``` + +For each PyTorch tests, developers should call the MLIR model from ".c" file. +In our case, here is an example "demo.c" file: + +```c +// 1. Include the MemRef wrapper +#include "memref.h" + +// 2. Create corresponding MemRef struct with data type `float` and one dimension. +NEW_MEMREF(float, 1); + +// 3. Declare the MLIR model interface +extern void _mlir_ciface_forward(struct MemRef_float_dim1 *output, + struct MemRef_float_dim1 *arg1, + struct MemRef_float_dim1 *arg2); + +// 4. Create example data array. The ".vdata" attribute will help emulator load the data into correct memory. +__attribute((section(".vdata"))) float input_float_0[512] = {1, 2, 3}; +struct MemRef_float_dim1 input1 = { + .allocatedPtr = input_float_0, + .alignedPtr = input_float_0, + .offset = 0, + .sizes = {512}, + .strides = {1}, +}; + +// 5. Declare the main entry. In t1 all tests entry should be `int test()` instead of main(). +int test() { + _mlir_ciface_forward(&output, &input1, &input2); + return 0; +} +``` + +After PyTorch model and the C entry is correctly created, developers should declare a "config.nix" +file to indicate our build system to find and build the test case: + +```nix +{ + # Tell our build system to include the memref.h header. + # Developer could add extra headers here. + includes = [ + ../memref.h + ]; + + # Tell the build system to run buddy-opt with three phrase, with arguments to run in each phrase + buddyOptArgs = [ + [ + "--pass-pipeline" + "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith), empty-tensor-to-alloc-tensor, convert-elementwise-to-linalg, arith-bufferize, func.func(linalg-bufferize, tensor-bufferize), func-bufferize)" + ] + [ + "--pass-pipeline" + "builtin.module(func.func(buffer-deallocation-simplification, convert-linalg-to-loops), eliminate-empty-tensors, func.func(llvm-request-c-wrappers))" + ] + [ + "--lower-affine" + "--convert-math-to-llvm" + "--convert-math-to-libm" + "--convert-scf-to-cf" + "--convert-arith-to-llvm" + "--expand-strided-metadata" + "--finalize-memref-to-llvm" + "--lower-vector-exp" + "--lower-rvv=rv32" + "--convert-vector-to-llvm" + "--convert-func-to-llvm" + "--reconcile-unrealized-casts" + ] + ]; +} +``` + +Our build system accept the below data layout for the "config.nix" file: + +```text +Set { + buddyOptArgs: Array>, + + includes: Optional>, + pythonArgs: Optional>, + buddyTranslateArgs: Optional>, + buddyLLCArgs: Optional>, +} +``` + +After the project have been implemented, developers can run the below commands to build and test the ELF: + +```bash +git add . +nix build '.#t1.blastoise.ip.cases.pytorch.demo' -L +ls ./result/bin/pytorch-demo.elf + +# To start the emulator and get waveform, run: +nix build '.#t1.blastoise.ip.cases.pytorch.demo.emu-result.with-trace' -L +``` + +## FAQ + +* How to debug the PyTorch code + +```bash +nix run '.#buddy-mlir-pyenv' -- demo.py +``` + +* How to run buddy compiler tools manually + +```bash +nix develop '.#t1.blastoise.ip.cases.pytorch.demo' -L +cd $(mktemp -d -t 'pytorch-debug-XXX') +pwd + +# Unpack sources +unpackPhase +# Check commands: +echo -e "$buildPhase" +# Run build +bash -c "$buildPhase" +``` From e196ba7c52ab05469e3eac8bf42ddf475972ea7f Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 17:36:24 +0800 Subject: [PATCH 015/140] [ci] add PyTorch tests into CI Only add for blastoise cuz the pytorch.demo tests required zve32f. --- .github/cases/blastoise/default.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/cases/blastoise/default.json b/.github/cases/blastoise/default.json index 68280a391..316f7d8dc 100644 --- a/.github/cases/blastoise/default.json +++ b/.github/cases/blastoise/default.json @@ -1,4 +1,5 @@ { + "pytorch.demo": -1, "mlir.rvv_vp_intrinsic_add": 436, "mlir.rvv_vp_intrinsic_add_scalable": 584, "mlir.hello": 146, From 87136b8c4120b1647d871ccc58712ea1fb24a6f5 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Thu, 8 Aug 2024 21:35:26 +0000 Subject: [PATCH 016/140] [deps] Bump T1 dependencies --- nix/t1/_sources/generated.json | 8 ++++---- nix/t1/_sources/generated.nix | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nix/t1/_sources/generated.json b/nix/t1/_sources/generated.json index b11295822..26e124b0b 100644 --- a/nix/t1/_sources/generated.json +++ b/nix/t1/_sources/generated.json @@ -41,7 +41,7 @@ }, "chisel": { "cargoLocks": null, - "date": "2024-08-06", + "date": "2024-08-07", "extract": null, "name": "chisel", "passthru": null, @@ -53,11 +53,11 @@ "name": null, "owner": "chipsalliance", "repo": "chisel", - "rev": "e6df4cf02fb68191ea76387f046dc63f7cef9820", - "sha256": "sha256-l4hNHDwWYnTC1T8yH58UIFVKszdDOmstK5ljk0R1nhg=", + "rev": "8572c28dbcc54a2f20ade462028ed50a2f3209b8", + "sha256": "sha256-j76XKUrUCI1g7fgcnaPWgU9KcI2a8T1jOSezOdRpkKI=", "type": "github" }, - "version": "e6df4cf02fb68191ea76387f046dc63f7cef9820" + "version": "8572c28dbcc54a2f20ade462028ed50a2f3209b8" }, "chisel-interface": { "cargoLocks": null, diff --git a/nix/t1/_sources/generated.nix b/nix/t1/_sources/generated.nix index 7f390ca9c..f74d22375 100644 --- a/nix/t1/_sources/generated.nix +++ b/nix/t1/_sources/generated.nix @@ -27,15 +27,15 @@ }; chisel = { pname = "chisel"; - version = "e6df4cf02fb68191ea76387f046dc63f7cef9820"; + version = "8572c28dbcc54a2f20ade462028ed50a2f3209b8"; src = fetchFromGitHub { owner = "chipsalliance"; repo = "chisel"; - rev = "e6df4cf02fb68191ea76387f046dc63f7cef9820"; + rev = "8572c28dbcc54a2f20ade462028ed50a2f3209b8"; fetchSubmodules = false; - sha256 = "sha256-l4hNHDwWYnTC1T8yH58UIFVKszdDOmstK5ljk0R1nhg="; + sha256 = "sha256-j76XKUrUCI1g7fgcnaPWgU9KcI2a8T1jOSezOdRpkKI="; }; - date = "2024-08-06"; + date = "2024-08-07"; }; chisel-interface = { pname = "chisel-interface"; From 2dc5b540a79e317754032e0a23c3885065ef20b9 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Tue, 6 Aug 2024 11:45:30 +0800 Subject: [PATCH 017/140] [ipemu] refactor probe signatures to improve readability --- t1/src/Lane.scala | 29 ++++++++++++----------------- t1/src/T1.scala | 18 +++++++++--------- t1/src/vrf/VRF.scala | 16 ++++++++-------- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index aeb388ae8..49ddff3ee 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -27,7 +27,7 @@ class LaneOM extends Class { vfus := vfusIn } -class LaneSlotProbe(instructionIndexBit: Int) extends Bundle { +class LaneSlotProbe(instructionIndexBits: Int) extends Bundle { val stage0EnqueueReady: Bool = Bool() val stage0EnqueueValid: Bool = Bool() val changingMaskSet: Bool = Bool() @@ -44,17 +44,17 @@ class LaneSlotProbe(instructionIndexBit: Int) extends Bundle { // write queue enq for lane val writeQueueEnq: Bool = Bool() - val writeTag: UInt = UInt(instructionIndexBit.W) + val writeTag: UInt = UInt(instructionIndexBits.W) val writeMask: UInt = UInt(4.W) } -class LaneWriteProbe(instructionIndexBit: Int) extends Bundle { - val writeTag: UInt = UInt(instructionIndexBit.W) +class LaneWriteProbe(instructionIndexBits: Int) extends Bundle { + val writeTag: UInt = UInt(instructionIndexBits.W) val writeMask: UInt = UInt(4.W) } -class LaneProbe(slotsSize: Int, instructionIndexBit: Int) extends Bundle { - val slots = Vec(slotsSize, new LaneSlotProbe(instructionIndexBit)) +class LaneProbe(parameter: LaneParameter) extends Bundle { + val slots = Vec(parameter.chainingSize, new LaneSlotProbe(parameter.instructionIndexBits)) // @todo @Clo91eaf remove valid here, add stall := valid & !ready val laneRequestValid: Bool = Bool() // @todo remove it. @@ -63,10 +63,10 @@ class LaneProbe(slotsSize: Int, instructionIndexBit: Int) extends Bundle { val lastSlotOccupied: Bool = Bool() // @todo replace it with VRFProbe val vrfInstructionWriteReportReady: Bool = Bool() - val instructionFinished: UInt = UInt(slotsSize.W) - val instructionValid: UInt = UInt(slotsSize.W) + val instructionFinished: UInt = UInt(parameter.chainingSize.W) + val instructionValid: UInt = UInt(parameter.chainingSize.W) - val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(instructionIndexBit))) + val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(parameter.instructionIndexBits))) } object LaneParameter { @@ -314,16 +314,11 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val vrfReadyToStore: Bool = IO(Output(Bool())) @public - val probe: LaneProbe = IO(Output(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits)))) - val probeWire: LaneProbe = Wire(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits)) + val probe: LaneProbe = IO(Output(Probe(new LaneProbe(parameter)))) + val probeWire: LaneProbe = Wire(new LaneProbe(parameter)) define(probe, ProbeValue(probeWire)) @public - val vrfProbe = IO(Output(Probe(new VRFProbe( - parameter.vrfParam.regNumBits, - parameter.vrfOffsetBits, - parameter.instructionIndexBits, - parameter.datapathWidth - )))) + val vrfProbe = IO(Output(Probe(new VRFProbe(parameter.vrfParam)))) @public val vrfAllocateIssue: Bool = IO(Output(Bool())) diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 1395dd89b..af110821c 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -292,18 +292,18 @@ case class T1Parameter( def adderParam: LaneAdderParam = LaneAdderParam(datapathWidth, 0) } -class T1Probe(param: T1Parameter) extends Bundle { - val instructionCounter: UInt = UInt(param.instructionIndexBits.W) +class T1Probe(parameter: T1Parameter) extends Bundle { + val instructionCounter: UInt = UInt(parameter.instructionIndexBits.W) val instructionIssue: Bool = Bool() - val issueTag: UInt = UInt(param.instructionIndexBits.W) + val issueTag: UInt = UInt(parameter.instructionIndexBits.W) val retireValid: Bool = Bool() // write queue enq for mask unit - val writeQueueEnq: ValidIO[UInt] = Valid(UInt(param.instructionIndexBits.W)) - val writeQueueEnqMask: UInt = UInt((param.datapathWidth / 8).W) + val writeQueueEnq: ValidIO[UInt] = Valid(UInt(parameter.instructionIndexBits.W)) + val writeQueueEnqMask: UInt = UInt((parameter.datapathWidth / 8).W) // mask unit instruction valid - val instructionValid: UInt = UInt((param.chainingSize * 2).W) + val instructionValid: UInt = UInt((parameter.chainingSize * 2).W) // instruction index for check rd - val responseCounter: UInt = UInt(param.instructionIndexBits.W) + val responseCounter: UInt = UInt(parameter.instructionIndexBits.W) } class T1Interface(parameter: T1Parameter) extends Record { @@ -332,10 +332,10 @@ class T1Interface(parameter: T1Parameter) extends Record { "t1Probe" -> Output(Probe(new T1Probe(parameter))), ) ++ Seq.tabulate(parameter.laneNumber)( - i => s"lane${i}Probe" -> Output(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits))) + i => s"lane${i}Probe" -> Output(Probe(new LaneProbe(parameter.laneParam))) ) ++ Seq.tabulate(parameter.laneNumber)( - i => s"lane${i}VrfProbe" -> Output(Probe(new VRFProbe(parameter.laneParam.vrfParam.regNumBits, parameter.laneParam.vrfOffsetBits, parameter.laneParam.instructionIndexBits, parameter.laneParam.datapathWidth))) + i => s"lane${i}VrfProbe" -> Output(Probe(new VRFProbe(parameter.laneParam.vrfParam))) ) ) } diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 179a32107..61df7864b 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -106,13 +106,13 @@ case class VRFParam( val vrfReadLatency = 2 } -class VRFProbe(regNumBits: Int, offsetBits: Int, instructionIndexSize: Int, dataPathWidth: Int) extends Bundle { +class VRFProbe(parameter: VRFParam) extends Bundle { val valid: Bool = Bool() - val requestVd: UInt = UInt(regNumBits.W) - val requestOffset: UInt = UInt(offsetBits.W) - val requestMask: UInt = UInt((dataPathWidth / 8).W) - val requestData: UInt = UInt(dataPathWidth.W) - val requestInstruction: UInt = UInt(instructionIndexSize.W) + val requestVd: UInt = UInt(parameter.regNumBits.W) + val requestOffset: UInt = UInt(parameter.vrfOffsetBits.W) + val requestMask: UInt = UInt((parameter.datapathWidth / 8).W) + val requestData: UInt = UInt(parameter.datapathWidth.W) + val requestInstruction: UInt = UInt(parameter.instructionIndexBits.W) } /** Vector Register File. @@ -564,8 +564,8 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar * Probe */ @public - val probe = IO(Output(Probe(new VRFProbe(parameter.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits, parameter.datapathWidth)))) - val probeWire = Wire(new VRFProbe(parameter.regNumBits, parameter.vrfOffsetBits, parameter.instructionIndexBits, parameter.datapathWidth)) + val probe = IO(Output(Probe(new VRFProbe(parameter)))) + val probeWire = Wire(new VRFProbe(parameter)) define(probe, ProbeValue(probeWire)) probeWire.valid := writePipe.valid From 7d0b99f24048221045e499cd8c6b8085dd7da6e6 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Tue, 6 Aug 2024 13:52:58 +0800 Subject: [PATCH 018/140] [ipemu] refactor to an single Probe to avoid using Record --- ipemu/src/TestBench.scala | 24 ++++++++---------------- t1/src/Lane.scala | 23 ++++++++--------------- t1/src/T1.scala | 26 ++++++-------------------- t1/src/lsu/LSU.scala | 4 ++-- t1/src/vrf/VRF.scala | 4 ++-- 5 files changed, 26 insertions(+), 55 deletions(-) diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index 21c3ad75a..c3c96b318 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -190,32 +190,24 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) // Events for difftest and performance modeling - val laneProbes = dut.io.laneProbes.zipWithIndex.map { - case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") - wire := probe.read(p) - wire + // Probes + val laneProbes = t1Probe.laneProbes.zipWithIndex.map { + case (lane, i) => lane.suggestName(s"lane${i}Probe") } - val lsuProbe = probe.read(dut.io.lsuProbe).suggestName("lsuProbe") + val lsuProbe = t1Probe.lsuProbe.suggestName("lsuProbe") val storeUnitProbe = lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") val otherUnitProbe = lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") - val laneVrfProbes = dut.io.laneVrfProbes.zipWithIndex.map { - case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") - wire := probe.read(p) - wire - } - // vrf write - laneVrfProbes.zipWithIndex.foreach { + laneProbes.zipWithIndex.foreach { case (lane, i) => - when(lane.valid)( + val vrf = lane.vrfProbe.suggestName(s"lane${i}VrfProbe") + when(vrf.valid)( printf( - cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" + cf"""{"event":"VrfWrite","issue_idx":${vrf.requestInstruction},"vd":${vrf.requestVd},"offset":${vrf.requestOffset},"mask":"${vrf.requestMask}%x","data":"${vrf.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" ) ) } diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 49ddff3ee..59a7eb8a6 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -55,18 +55,15 @@ class LaneWriteProbe(instructionIndexBits: Int) extends Bundle { class LaneProbe(parameter: LaneParameter) extends Bundle { val slots = Vec(parameter.chainingSize, new LaneSlotProbe(parameter.instructionIndexBits)) - // @todo @Clo91eaf remove valid here, add stall := valid & !ready - val laneRequestValid: Bool = Bool() - // @todo remove it. - val laneRequestReady: Bool = Bool() + val laneRequestStall: Bool = Bool() // @todo @Clo91eaf change to occupied for each slot. val lastSlotOccupied: Bool = Bool() - // @todo replace it with VRFProbe - val vrfInstructionWriteReportReady: Bool = Bool() val instructionFinished: UInt = UInt(parameter.chainingSize.W) val instructionValid: UInt = UInt(parameter.chainingSize.W) val crossWriteProbe: Vec[ValidIO[LaneWriteProbe]] = Vec(2, Valid(new LaneWriteProbe(parameter.instructionIndexBits))) + + val vrfProbe: VRFProbe = new VRFProbe(parameter.vrfParam) } object LaneParameter { @@ -314,11 +311,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ val vrfReadyToStore: Bool = IO(Output(Bool())) @public - val probe: LaneProbe = IO(Output(Probe(new LaneProbe(parameter)))) - val probeWire: LaneProbe = Wire(new LaneProbe(parameter)) - define(probe, ProbeValue(probeWire)) - @public - val vrfProbe = IO(Output(Probe(new VRFProbe(parameter.vrfParam)))) + val laneProbe = IO(Output(Probe(new LaneProbe(parameter)))) + val probeWire = Wire(new LaneProbe(parameter)) + define(laneProbe, ProbeValue(probeWire)) @public val vrfAllocateIssue: Bool = IO(Output(Bool())) @@ -328,7 +323,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ /** VRF instantces. */ val vrf: Instance[VRF] = Instantiate(new VRF(parameter.vrfParam)) - define(vrfProbe, vrf.probe) /** TODO: review later */ @@ -1217,10 +1211,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex // probe wire - probeWire.laneRequestValid := laneRequest.valid - probeWire.laneRequestReady := laneRequest.ready + probeWire.laneRequestStall := laneRequest.valid && !laneRequest.ready probeWire.lastSlotOccupied := slotOccupied.last - probeWire.vrfInstructionWriteReportReady := vrf.instructionWriteReport.ready probeWire.instructionFinished := instructionFinished probeWire.instructionValid := instructionValid probeWire.crossWriteProbe.zip(writeBusPort).foreach {case (pb, port) => @@ -1228,4 +1220,5 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ pb.bits.writeTag := port.deq.bits.instructionIndex pb.bits.writeMask := port.deq.bits.mask } + probeWire.vrfProbe := probe.read(vrf.vrfProbe) } diff --git a/t1/src/T1.scala b/t1/src/T1.scala index af110821c..5a36bb040 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -304,6 +304,9 @@ class T1Probe(parameter: T1Parameter) extends Bundle { val instructionValid: UInt = UInt((parameter.chainingSize * 2).W) // instruction index for check rd val responseCounter: UInt = UInt(parameter.instructionIndexBits.W) + // probes + val lsuProbe: LSUProbe = new LSUProbe(parameter.lsuParameters) + val laneProbes: Vec[LaneProbe] = Vec(parameter.laneNumber, new LaneProbe(parameter.laneParam)) } class T1Interface(parameter: T1Parameter) extends Record { @@ -314,11 +317,7 @@ class T1Interface(parameter: T1Parameter) extends Record { def highBandwidthLoadStorePort: AXI4RWIrrevocable = elements("highBandwidthLoadStorePort").asInstanceOf[AXI4RWIrrevocable] def indexedLoadStorePort: AXI4RWIrrevocable = elements("indexedLoadStorePort").asInstanceOf[AXI4RWIrrevocable] def om: Property[ClassType] = elements("om").asInstanceOf[Property[ClassType]] - // TODO: refactor to an single Probe to avoid using Record on the [[T1Interface]]. - def lsuProbe: LSUProbe = elements("lsuProbe").asInstanceOf[LSUProbe] def t1Probe: T1Probe = elements("t1Probe").asInstanceOf[T1Probe] - def laneProbes: Seq[LaneProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}Probe").asInstanceOf[LaneProbe]) - def laneVrfProbes: Seq[VRFProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}VrfProbe").asInstanceOf[VRFProbe]) val elements: SeqMap[String, Data] = SeqMap.from( Seq( "clock" -> Input(Clock()), @@ -328,15 +327,8 @@ class T1Interface(parameter: T1Parameter) extends Record { "highBandwidthLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter), "indexedLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32)), "om" -> Output(Property[AnyClassType]()), - "lsuProbe" -> Output(Probe(new LSUProbe(parameter.lsuParameters))), "t1Probe" -> Output(Probe(new T1Probe(parameter))), - ) ++ - Seq.tabulate(parameter.laneNumber)( - i => s"lane${i}Probe" -> Output(Probe(new LaneProbe(parameter.laneParam))) - ) ++ - Seq.tabulate(parameter.laneNumber)( - i => s"lane${i}VrfProbe" -> Output(Probe(new VRFProbe(parameter.laneParam.vrfParam))) - ) + ) ) } @@ -1583,15 +1575,7 @@ class T1(val parameter: T1Parameter) lane } - laneVec.zipWithIndex.foreach { case (lane, index) => - define(io.laneProbes(index), lane.probe) - define(io.laneVrfProbes(index), lane.vrfProbe) - } - omInstance.lanesIn := Property(laneVec.map(_.om.asAnyClassType)) - - define(io.lsuProbe, lsu._probe) - dataInWritePipeVec := VecInit(laneVec.map(_.writeQueueValid)) // 连lsu @@ -1745,6 +1729,8 @@ class T1(val parameter: T1Parameter) !slots.last.state.sMaskUnitExecution && !slots.last.state.idle, indexToOH(slots.last.record.instructionIndex, parameter.chainingSize * 2)).asUInt probeWire.responseCounter := responseCounter + probeWire.laneProbes.zip(laneVec).foreach { case (p, l) => p := probe.read(l.laneProbe) } + probeWire.lsuProbe := probe.read(lsu.lsuProbe) // new V Request from core diff --git a/t1/src/lsu/LSU.scala b/t1/src/lsu/LSU.scala index a938973dc..9cb3b7d58 100644 --- a/t1/src/lsu/LSU.scala +++ b/t1/src/lsu/LSU.scala @@ -246,9 +246,9 @@ class LSU(param: LSUParameter) extends Module { ) @public - val _probe = IO(Output(Probe(new LSUProbe(param)))) + val lsuProbe = IO(Output(Probe(new LSUProbe(param)))) val probeWire = Wire(new LSUProbe(param)) - define(_probe, ProbeValue(probeWire)) + define(lsuProbe, ProbeValue(probeWire)) // read vrf val otherTryReadVrf: UInt = Mux(otherUnit.vrfReadDataPorts.valid, otherUnit.status.targetLane, 0.U) diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 61df7864b..724ef6372 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -564,9 +564,9 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar * Probe */ @public - val probe = IO(Output(Probe(new VRFProbe(parameter)))) + val vrfProbe = IO(Output(Probe(new VRFProbe(parameter)))) val probeWire = Wire(new VRFProbe(parameter)) - define(probe, ProbeValue(probeWire)) + define(vrfProbe, ProbeValue(probeWire)) probeWire.valid := writePipe.valid probeWire.requestVd := writePipe.bits.vd From 9fc31b85d8942f468a652107d43559ba4814a798 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 11:31:28 +0800 Subject: [PATCH 019/140] [build system] add rocketv build target --- build.sc | 19 ++++++++++++++++++- common.sc | 24 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/build.sc b/build.sc index 8e2cc5c3d..b1f0d35c0 100644 --- a/build.sc +++ b/build.sc @@ -35,7 +35,7 @@ trait Chisel object arithmetic extends Arithmetic -trait Arithmetic +trait Arithmetic extends millbuild.dependencies.arithmetic.common.ArithmeticModule { override def millSourcePath = os.pwd / "dependencies" / "arithmetic" / "arithmetic" def scalaVersion = T(v.scala) @@ -118,6 +118,23 @@ trait ConfigGen def mainargsIvy = v.mainargs } +object rocketv extends RocketV + +trait RocketV + extends millbuild.common.RocketVModule + with ScalafmtModule { + def scalaVersion = T(v.scala) + def rvdecoderdbModule = rvdecoderdb + def riscvOpcodesPath = T.input(PathRef(os.pwd / "dependencies" / "riscv-opcodes")) + def hardfloatModule = hardfloat + def axi4Module = axi4 + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + object ipemu extends IPEmulator trait IPEmulator diff --git a/common.sc b/common.sc index 21552589d..82786b560 100644 --- a/common.sc +++ b/common.sc @@ -70,6 +70,30 @@ trait ConfigGenModule override def ivyDeps = T(super.ivyDeps() ++ Seq(mainargsIvy)) } +// T1 forked version of RocketCore +trait RocketModule + extends ScalaModule + with HasChisel + with HasRVDecoderDB { + def rocketchipModule: ScalaModule + def moduleDeps = super.moduleDeps ++ Seq(rocketchipModule) +} + +// The next generation of purely standalone Rocket Core w/ AXI/CHI. +trait RocketVModule + extends ScalaModule + with HasChisel + with HasRVDecoderDB { + def axi4Module: ScalaModule + def hardfloatModule: ScalaModule + + def moduleDeps = super.moduleDeps ++ Seq(axi4Module, hardfloatModule) +} + +trait EmuHelperModule + extends ScalaModule + with HasChisel + trait IPEmulatorModule extends ScalaModule with HasChisel { From 8d6fa6241ff2777b0eb53cd381e005c27387b3f7 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Tue, 25 Jun 2024 17:40:25 +0800 Subject: [PATCH 020/140] [build system] implement the generic Elaborator trait --- elaborator/src/Elaborator.scala | 77 +++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 elaborator/src/Elaborator.scala diff --git a/elaborator/src/Elaborator.scala b/elaborator/src/Elaborator.scala new file mode 100644 index 000000000..f4e080865 --- /dev/null +++ b/elaborator/src/Elaborator.scala @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator + +import chisel3.RawModule +import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} +import mainargs.TokensReader + +import scala.reflect.runtime.universe +import scala.reflect.runtime.universe.{runtimeMirror, typeOf} + +// TODO: this will be upstreamed to Chisel +trait Elaborator { + implicit object PathRead extends TokensReader.Simple[os.Path] { + def shortName = "path" + def read(strs: Seq[String]) = Right(os.Path(strs.head, os.pwd)) + } + + def configImpl[P <: SerializableModuleParameter: universe.TypeTag]( + parameter: P + )(implicit rwP: upickle.default.Writer[P]) = os.write.over( + os.pwd / s"${getClass.getSimpleName.replace("$", "")}.json", + upickle.default.write(parameter) + ) + + def designImpl[ + M <: SerializableModule[P]: universe.TypeTag, + P <: SerializableModuleParameter: universe.TypeTag + ](parameter: os.Path, runFirtool: Boolean)(implicit + rwP: upickle.default.Reader[P] + ) = { + var fir: firrtl.ir.Circuit = null + val annos = Seq( + new chisel3.stage.phases.Elaborate, + new chisel3.stage.phases.Convert + ).foldLeft( + Seq( + chisel3.stage.ChiselGeneratorAnnotation(() => + SerializableModuleGenerator( + runtimeMirror(getClass.getClassLoader) + .runtimeClass(typeOf[M].typeSymbol.asClass) + .asInstanceOf[Class[M]], + upickle.default.read[P](os.read(parameter)) + ).module().asInstanceOf[RawModule] + ) + ): firrtl.AnnotationSeq + ) { case (annos, stage) => stage.transform(annos) } + .flatMap { + case firrtl.stage.FirrtlCircuitAnnotation(circuit) => + fir = circuit + None + case _: chisel3.stage.DesignAnnotation[_] => None + case _: chisel3.stage.ChiselCircuitAnnotation => None + case a => Some(a) + } + val annoJsonFile = os.pwd / s"${fir.main}.anno.json" + val firFile = os.pwd / s"${fir.main}.fir" + val svFile = os.pwd / s"${fir.main}.sv" + os.write.over(firFile, fir.serialize) + os.write.over( + annoJsonFile, + firrtl.annotations.JsonProtocol.serializeRecover(annos) + ) + if (runFirtool) { + os.proc( + "firtool", + s"--annotation-file=${annoJsonFile}", + s"${firFile}", + s"-o", + s"${svFile}", + "--strip-debug-info", + "--verification-flavor=sva", + "--extract-test-code" + ).call(os.pwd) + } + } +} From 7e59aa97b6dd314bba18fcd90dfcdb7abe1c4937 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 12:07:43 +0800 Subject: [PATCH 021/140] [build system] add rocketv to elaborator --- build.sc | 1 + 1 file changed, 1 insertion(+) diff --git a/build.sc b/build.sc index b1f0d35c0..dd587489a 100644 --- a/build.sc +++ b/build.sc @@ -174,6 +174,7 @@ trait Elaborator def generators = Seq( t1, ipemu, + rocketv, ) def mainargsIvy = v.mainargs From 5e7e11cb7140bf17de3d3e2bb3cea813d7b00b67 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 12:10:29 +0800 Subject: [PATCH 022/140] [rocketv] copy ALU into rocketv project --- rocketv/src/ALU.scala | 119 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 rocketv/src/ALU.scala diff --git a/rocketv/src/ALU.scala b/rocketv/src/ALU.scala new file mode 100644 index 000000000..ac17d4a9f --- /dev/null +++ b/rocketv/src/ALU.scala @@ -0,0 +1,119 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{BitPat, Cat, Fill, Reverse} +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.tile.CoreModule + +class ALUFN { + val SZ_ALU_FN = 4 + def FN_X = BitPat("b????") + def FN_ADD = 0.U + def FN_SL = 1.U + def FN_SEQ = 2.U + def FN_SNE = 3.U + def FN_XOR = 4.U + def FN_SR = 5.U + def FN_OR = 6.U + def FN_AND = 7.U + def FN_CZEQZ = 8.U + def FN_CZNEZ = 9.U + def FN_SUB = 10.U + def FN_SRA = 11.U + def FN_SLT = 12.U + def FN_SGE = 13.U + def FN_SLTU = 14.U + def FN_SGEU = 15.U + + // Mul/div reuse some integer FNs + def FN_DIV = FN_XOR + def FN_DIVU = FN_SR + def FN_REM = FN_OR + def FN_REMU = FN_AND + + def FN_MUL = FN_ADD + def FN_MULH = FN_SL + def FN_MULHSU = FN_SEQ + def FN_MULHU = FN_SNE + + def isMulFN(fn: UInt, cmp: UInt) = fn(1, 0) === cmp(1, 0) + def isSub(cmd: UInt) = cmd(3) + def isCmp(cmd: UInt) = cmd >= FN_SLT + def cmpUnsigned(cmd: UInt) = cmd(1) + def cmpInverted(cmd: UInt) = cmd(0) + def cmpEq(cmd: UInt) = !cmd(3) +} + +object ALUFN { + def apply() = new ALUFN +} + +abstract class AbstractALU[T <: ALUFN](val aluFn: T)(implicit p: Parameters) extends CoreModule()(p) { + val io = IO(new Bundle { + val dw = Input(UInt(SZ_DW.W)) + val fn = Input(UInt(aluFn.SZ_ALU_FN.W)) + val in2 = Input(UInt(xLen.W)) + val in1 = Input(UInt(xLen.W)) + val out = Output(UInt(xLen.W)) + val adder_out = Output(UInt(xLen.W)) + val cmp_out = Output(Bool()) + }) +} + +class ALU(implicit p: Parameters) extends AbstractALU(new ALUFN)(p) { + // ADD, SUB + val in2_inv = Mux(aluFn.isSub(io.fn), ~io.in2, io.in2) + val in1_xor_in2 = io.in1 ^ in2_inv + io.adder_out := io.in1 + in2_inv + aluFn.isSub(io.fn) + + // SLT, SLTU + val slt = + Mux( + io.in1(xLen - 1) === io.in2(xLen - 1), + io.adder_out(xLen - 1), + Mux(aluFn.cmpUnsigned(io.fn), io.in2(xLen - 1), io.in1(xLen - 1)) + ) + io.cmp_out := aluFn.cmpInverted(io.fn) ^ Mux(aluFn.cmpEq(io.fn), in1_xor_in2 === 0.U, slt) + + // SLL, SRL, SRA + val (shamt, shin_r) = + if (xLen == 32) (io.in2(4, 0), io.in1) + else { + require(xLen == 64) + val shin_hi_32 = Fill(32, aluFn.isSub(io.fn) && io.in1(31)) + val shin_hi = Mux(io.dw === DW_64, io.in1(63, 32), shin_hi_32) + val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4, 0)) + (shamt, Cat(shin_hi, io.in1(31, 0))) + } + val shin = Mux(io.fn === aluFn.FN_SR || io.fn === aluFn.FN_SRA, shin_r, Reverse(shin_r)) + val shout_r = (Cat(aluFn.isSub(io.fn) & shin(xLen - 1), shin).asSInt >> shamt)(xLen - 1, 0) + val shout_l = Reverse(shout_r) + val shout = Mux(io.fn === aluFn.FN_SR || io.fn === aluFn.FN_SRA, shout_r, 0.U) | + Mux(io.fn === aluFn.FN_SL, shout_l, 0.U) + + // CZEQZ, CZNEZ + val in2_not_zero = io.in2.orR + val cond_out = Option.when(usingConditionalZero)( + Mux((io.fn === aluFn.FN_CZEQZ && in2_not_zero) || (io.fn === aluFn.FN_CZNEZ && !in2_not_zero), io.in1, 0.U) + ) + + // AND, OR, XOR + val logic = Mux(io.fn === aluFn.FN_XOR || io.fn === aluFn.FN_OR, in1_xor_in2, 0.U) | + Mux(io.fn === aluFn.FN_OR || io.fn === aluFn.FN_AND, io.in1 & io.in2, 0.U) + + val shift_logic = (aluFn.isCmp(io.fn) && slt) | logic | shout + val shift_logic_cond = cond_out match { + case Some(co) => shift_logic | co + case _ => shift_logic + } + val out = Mux(io.fn === aluFn.FN_ADD || io.fn === aluFn.FN_SUB, io.adder_out, shift_logic_cond) + + io.out := out + if (xLen > 32) { + require(xLen == 64) + when(io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31, 0)) } + } +} From d297bc6ef8fdbd47079c2491c2d8a35c91899be8 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 12:24:31 +0800 Subject: [PATCH 023/140] [rocketv] migrate ALU --- rocketv/src/ALU.scala | 111 ++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 54 deletions(-) diff --git a/rocketv/src/ALU.scala b/rocketv/src/ALU.scala index ac17d4a9f..ecf34d50a 100644 --- a/rocketv/src/ALU.scala +++ b/rocketv/src/ALU.scala @@ -1,69 +1,72 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{BitPat, Cat, Fill, Reverse} -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.tile.CoreModule - -class ALUFN { - val SZ_ALU_FN = 4 - def FN_X = BitPat("b????") - def FN_ADD = 0.U - def FN_SL = 1.U - def FN_SEQ = 2.U - def FN_SNE = 3.U - def FN_XOR = 4.U - def FN_SR = 5.U - def FN_OR = 6.U - def FN_AND = 7.U - def FN_CZEQZ = 8.U - def FN_CZNEZ = 9.U - def FN_SUB = 10.U - def FN_SRA = 11.U - def FN_SLT = 12.U - def FN_SGE = 13.U - def FN_SLTU = 14.U - def FN_SGEU = 15.U - - // Mul/div reuse some integer FNs - def FN_DIV = FN_XOR - def FN_DIVU = FN_SR - def FN_REM = FN_OR - def FN_REMU = FN_AND - - def FN_MUL = FN_ADD - def FN_MULH = FN_SL - def FN_MULHSU = FN_SEQ - def FN_MULHU = FN_SNE - - def isMulFN(fn: UInt, cmp: UInt) = fn(1, 0) === cmp(1, 0) +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, Fill, Reverse} + +object ALUParameter { + implicit def rwP: upickle.default.ReadWriter[ALUParameter] = upickle.default.macroRW[ALUParameter] +} + +case class ALUParameter(xLen: Int) extends SerializableModuleParameter { + val uopSize: Int = 4 + // static to false for now + val usingConditionalZero = false + + // TODO:move these to decoder. + val FN_ADD = 0.U + val FN_SL = 1.U + val FN_SEQ = 2.U + val FN_SNE = 3.U + val FN_XOR = 4.U + val FN_SR = 5.U + val FN_OR = 6.U + val FN_AND = 7.U + val FN_CZEQZ = 8.U + val FN_CZNEZ = 9.U + val FN_SUB = 10.U + val FN_SRA = 11.U + val FN_SLT = 12.U + def isSub(cmd: UInt) = cmd(3) def isCmp(cmd: UInt) = cmd >= FN_SLT def cmpUnsigned(cmd: UInt) = cmd(1) def cmpInverted(cmd: UInt) = cmd(0) def cmpEq(cmd: UInt) = !cmd(3) -} -object ALUFN { - def apply() = new ALUFN + def DW_32 = false.B + def DW_64 = true.B } -abstract class AbstractALU[T <: ALUFN](val aluFn: T)(implicit p: Parameters) extends CoreModule()(p) { - val io = IO(new Bundle { - val dw = Input(UInt(SZ_DW.W)) - val fn = Input(UInt(aluFn.SZ_ALU_FN.W)) - val in2 = Input(UInt(xLen.W)) - val in1 = Input(UInt(xLen.W)) - val out = Output(UInt(xLen.W)) - val adder_out = Output(UInt(xLen.W)) - val cmp_out = Output(Bool()) - }) +class ALUInterface(parameter: ALUParameter) extends Bundle { + val dw = Input(UInt(1.W)) + val fn = Input(UInt(parameter.uopSize.W)) + val in2 = Input(UInt(parameter.xLen.W)) + val in1 = Input(UInt(parameter.xLen.W)) + val out = Output(UInt(parameter.xLen.W)) + val adder_out = Output(UInt(parameter.xLen.W)) + val cmp_out = Output(Bool()) } -class ALU(implicit p: Parameters) extends AbstractALU(new ALUFN)(p) { +@instantiable +class ALU(val parameter: ALUParameter) + extends FixedIORawModule(new ALUInterface(parameter)) + with SerializableModule[ALUParameter] { + // compatibility layer + val aluFn = parameter + val xLen = parameter.xLen + val DW_64 = parameter.DW_64 + val usingConditionalZero = parameter.usingConditionalZero + val DW_32 = parameter.DW_32 + + + // Original implementation + // ADD, SUB val in2_inv = Mux(aluFn.isSub(io.fn), ~io.in2, io.in2) val in1_xor_in2 = io.in1 ^ in2_inv From 0f3c47f177cb5bc8a670d12a4843c8eaa8ebd466 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 12:47:30 +0800 Subject: [PATCH 024/140] [rocketv] add elaborator for ALU - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.ALU config --xLen 32 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.ALU design --parameter ./ALU.json --run-firtool --- elaborator/src/rocketv/ALU.scala | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 elaborator/src/rocketv/ALU.scala diff --git a/elaborator/src/rocketv/ALU.scala b/elaborator/src/rocketv/ALU.scala new file mode 100644 index 000000000..05ad596b5 --- /dev/null +++ b/elaborator/src/rocketv/ALU.scala @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{ALU, ALUParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object ALU extends Elaborator { + @main + case class ALUParameterMain( + @arg(name = "xLen") xLen: Int) { + def convert: ALUParameter = ALUParameter(xLen) + } + + implicit def ALUParameterMainParser: ParserForClass[ALUParameterMain] = ParserForClass[ALUParameterMain] + + @main + def config(@arg(name = "parameter") parameter: ALUParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[ALU, ALUParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 6d691df5f5ead00727051051d63508d3bdfda501 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 13:02:12 +0800 Subject: [PATCH 025/140] [rocketv] copy AMOALU into rocketv project --- rocketv/src/AMOALU.scala | 109 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 rocketv/src/AMOALU.scala diff --git a/rocketv/src/AMOALU.scala b/rocketv/src/AMOALU.scala new file mode 100644 index 000000000..ec3a7be30 --- /dev/null +++ b/rocketv/src/AMOALU.scala @@ -0,0 +1,109 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util._ +import org.chipsalliance.cde.config.Parameters + +class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) { + val size = Wire(UInt(log2Up(log2Up(maxSize) + 1).W)) + size := typ + def misaligned: Bool = + (addr & ((1.U << size) - 1.U)(log2Up(maxSize) - 1, 0)).orR + + def mask = { + var res = 1.U + for (i <- 0 until log2Up(maxSize)) { + val upper = Mux(addr(i), res, 0.U) | Mux(size >= (i + 1).U, ((BigInt(1) << (1 << i)) - 1).U, 0.U) + val lower = Mux(addr(i), 0.U, res) + res = Cat(upper, lower) + } + res + } + + protected def genData(i: Int): UInt = + if (i >= log2Up(maxSize)) dat + else Mux(size === i.U, Fill(1 << (log2Up(maxSize) - i), dat((8 << i) - 1, 0)), genData(i + 1)) + + def data = genData(0) + def wordData = genData(2) +} + +class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) { + private val size = new StoreGen(typ, addr, dat, maxSize).size + + private def genData(logMinSize: Int): UInt = { + var res = dat + for (i <- log2Up(maxSize) - 1 to logMinSize by -1) { + val pos = 8 << i + val shifted = Mux(addr(i), res(2 * pos - 1, pos), res(pos - 1, 0)) + val doZero = (i == 0).B && zero + val zeroed = Mux(doZero, 0.U, shifted) + res = Cat( + Mux(size === i.U || doZero, Fill(8 * maxSize - pos, signed && zeroed(pos - 1)), res(8 * maxSize - 1, pos)), + zeroed + ) + } + res + } + + def wordData = genData(2) + def data = genData(0) +} + +class AMOALU(operandBits: Int)(implicit p: Parameters) extends Module { + val minXLen = 32 + val widths = (0 to log2Ceil(operandBits / minXLen)).map(minXLen << _) + + val io = IO(new Bundle { + val mask = Input(UInt((operandBits / 8).W)) + val cmd = Input(UInt(M_SZ.W)) + val lhs = Input(UInt(operandBits.W)) + val rhs = Input(UInt(operandBits.W)) + val out = Output(UInt(operandBits.W)) + val out_unmasked = Output(UInt(operandBits.W)) + }) + + val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU + val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU + val add = io.cmd === M_XA_ADD + val logic_and = io.cmd === M_XA_OR || io.cmd === M_XA_AND + val logic_xor = io.cmd === M_XA_XOR || io.cmd === M_XA_OR + + val adder_out = { + // partition the carry chain to support sub-xLen addition + val mask = ~(0.U(operandBits.W) +: widths.init.map(w => !io.mask(w / 8 - 1) << (w - 1))).reduce(_ | _) + (io.lhs & mask) + (io.rhs & mask) + } + + val less = { + // break up the comparator so the lower parts will be CSE'd + def isLessUnsigned(x: UInt, y: UInt, n: Int): Bool = { + if (n == minXLen) x(n - 1, 0) < y(n - 1, 0) + else x(n - 1, n / 2) < y(n - 1, n / 2) || x(n - 1, n / 2) === y(n - 1, n / 2) && isLessUnsigned(x, y, n / 2) + } + + def isLess(x: UInt, y: UInt, n: Int): Bool = { + val signed = { + val mask = M_XA_MIN ^ M_XA_MINU + (io.cmd & mask) === (M_XA_MIN & mask) + } + Mux(x(n - 1) === y(n - 1), isLessUnsigned(x, y, n), Mux(signed, x(n - 1), y(n - 1))) + } + + PriorityMux(widths.reverse.map(w => (io.mask(w / 8 / 2), isLess(io.lhs, io.rhs, w)))) + } + + val minmax = Mux(Mux(less, min, max), io.lhs, io.rhs) + val logic = + Mux(logic_and, io.lhs & io.rhs, 0.U) | + Mux(logic_xor, io.lhs ^ io.rhs, 0.U) + val out = + Mux(add, adder_out, Mux(logic_and || logic_xor, logic, minmax)) + + val wmask = FillInterleaved(8, io.mask) + io.out := wmask & out | ~wmask & io.lhs + io.out_unmasked := out +} From 04a8eaf736ff10fe05d21875c9c09d07a7935c28 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 13:07:54 +0800 Subject: [PATCH 026/140] [rocketv] migrate AMOALU --- rocketv/src/AMOALU.scala | 98 +++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 56 deletions(-) diff --git a/rocketv/src/AMOALU.scala b/rocketv/src/AMOALU.scala index ec3a7be30..84e9ec5a8 100644 --- a/rocketv/src/AMOALU.scala +++ b/rocketv/src/AMOALU.scala @@ -1,70 +1,56 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util._ -import org.chipsalliance.cde.config.Parameters - -class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) { - val size = Wire(UInt(log2Up(log2Up(maxSize) + 1).W)) - size := typ - def misaligned: Bool = - (addr & ((1.U << size) - 1.U)(log2Up(maxSize) - 1, 0)).orR - - def mask = { - var res = 1.U - for (i <- 0 until log2Up(maxSize)) { - val upper = Mux(addr(i), res, 0.U) | Mux(size >= (i + 1).U, ((BigInt(1) << (1 << i)) - 1).U, 0.U) - val lower = Mux(addr(i), 0.U, res) - res = Cat(upper, lower) - } - res - } +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{FillInterleaved, PriorityMux, log2Ceil} - protected def genData(i: Int): UInt = - if (i >= log2Up(maxSize)) dat - else Mux(size === i.U, Fill(1 << (log2Up(maxSize) - i), dat((8 << i) - 1, 0)), genData(i + 1)) - - def data = genData(0) - def wordData = genData(2) +object AMOALUParameter { + implicit def rwP: upickle.default.ReadWriter[AMOALUParameter] = upickle.default.macroRW[AMOALUParameter] } -class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) { - private val size = new StoreGen(typ, addr, dat, maxSize).size - - private def genData(logMinSize: Int): UInt = { - var res = dat - for (i <- log2Up(maxSize) - 1 to logMinSize by -1) { - val pos = 8 << i - val shifted = Mux(addr(i), res(2 * pos - 1, pos), res(pos - 1, 0)) - val doZero = (i == 0).B && zero - val zeroed = Mux(doZero, 0.U, shifted) - res = Cat( - Mux(size === i.U || doZero, Fill(8 * maxSize - pos, signed && zeroed(pos - 1)), res(8 * maxSize - 1, pos)), - zeroed - ) - } - res - } +case class AMOALUParameter(operandBits: Int) extends SerializableModuleParameter { + val uopSize: Int = 4 + def M_XA_ADD = "b01000".U + def M_XA_XOR = "b01001".U + def M_XA_OR = "b01010".U + def M_XA_AND = "b01011".U + def M_XA_MIN = "b01100".U + def M_XA_MAX = "b01101".U + def M_XA_MINU = "b01110".U + def M_XA_MAXU = "b01111".U +} - def wordData = genData(2) - def data = genData(0) +class AMOALUInterface(parameter: AMOALUParameter) extends Bundle { + val mask = Input(UInt((parameter.operandBits / 8).W)) + val cmd = Input(UInt(parameter.uopSize.W)) + val lhs = Input(UInt(parameter.operandBits.W)) + val rhs = Input(UInt(parameter.operandBits.W)) + val out = Output(UInt(parameter.operandBits.W)) + val out_unmasked = Output(UInt(parameter.operandBits.W)) } -class AMOALU(operandBits: Int)(implicit p: Parameters) extends Module { +@instantiable +class AMOALU(val parameter: AMOALUParameter) + extends FixedIORawModule(new AMOALUInterface(parameter)) + with SerializableModule[AMOALUParameter] { + val M_XA_MAX = parameter.M_XA_MAX + val M_XA_MAXU = parameter.M_XA_MAXU + val M_XA_MIN = parameter.M_XA_MIN + val M_XA_MINU = parameter.M_XA_MINU + val M_XA_ADD = parameter.M_XA_ADD + val M_XA_OR = parameter.M_XA_OR + val M_XA_AND = parameter.M_XA_AND + val M_XA_XOR = parameter.M_XA_XOR + val operandBits = parameter.operandBits val minXLen = 32 val widths = (0 to log2Ceil(operandBits / minXLen)).map(minXLen << _) - val io = IO(new Bundle { - val mask = Input(UInt((operandBits / 8).W)) - val cmd = Input(UInt(M_SZ.W)) - val lhs = Input(UInt(operandBits.W)) - val rhs = Input(UInt(operandBits.W)) - val out = Output(UInt(operandBits.W)) - val out_unmasked = Output(UInt(operandBits.W)) - }) + // Original implementation val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU From eec1d9f7c65ad4cb4023fd3099e2021310de4f6d Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 13:13:47 +0800 Subject: [PATCH 027/140] [rocketv] add elaborator for AMOALU - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.AMOALU config --operandBits 32 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.AMOALU design --parameter ./AMOALU.json --run-firtool --- elaborator/src/rocketv/AMOALU.scala | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 elaborator/src/rocketv/AMOALU.scala diff --git a/elaborator/src/rocketv/AMOALU.scala b/elaborator/src/rocketv/AMOALU.scala new file mode 100644 index 000000000..148936db5 --- /dev/null +++ b/elaborator/src/rocketv/AMOALU.scala @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{AMOALU, AMOALUParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object AMOALU extends Elaborator { + @main + case class AMOALUParameterMain( + @arg(name = "operandBits") operandBits: Int) { + def convert: AMOALUParameter = AMOALUParameter(operandBits) + } + + implicit def AMOALUParameterMainParser: ParserForClass[AMOALUParameterMain] = ParserForClass[AMOALUParameterMain] + + @main + def config(@arg(name = "parameter") parameter: AMOALUParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[AMOALU, AMOALUParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From f32aa79f44dd99ff133bbe354f49944b7140524c Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 13:21:30 +0800 Subject: [PATCH 028/140] [rocketv] copy Breakpoint into rocketv project --- rocketv/src/Breakpoint.scala | 133 +++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 rocketv/src/Breakpoint.scala diff --git a/rocketv/src/Breakpoint.scala b/rocketv/src/Breakpoint.scala new file mode 100644 index 000000000..51a683a8f --- /dev/null +++ b/rocketv/src/Breakpoint.scala @@ -0,0 +1,133 @@ +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{Cat} +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.tile.{CoreBundle, HasCoreParameters} +import freechips.rocketchip.util._ + +class BPControl(implicit p: Parameters) extends CoreBundle()(p) { + val ttype = UInt(4.W) + val dmode = Bool() + val maskmax = UInt(6.W) + val reserved = UInt((xLen - (if (coreParams.useBPWatch) 26 else 24)).W) + val action = UInt((if (coreParams.useBPWatch) 3 else 1).W) + val chain = Bool() + val zero = UInt(2.W) + val tmatch = UInt(2.W) + val m = Bool() + val h = Bool() + val s = Bool() + val u = Bool() + val x = Bool() + val w = Bool() + val r = Bool() + + def tType = 2 + def maskMax = 4 + def enabled(mstatus: MStatus) = !mstatus.debug && Cat(m, h, s, u)(mstatus.prv) +} + +class TExtra(implicit p: Parameters) extends CoreBundle()(p) { + def mvalueBits: Int = if (xLen == 32) coreParams.mcontextWidth.min(6) else coreParams.mcontextWidth.min(13) + def svalueBits: Int = if (xLen == 32) coreParams.scontextWidth.min(16) else coreParams.scontextWidth.min(34) + def mselectPos: Int = if (xLen == 32) 25 else 50 + def mvaluePos: Int = mselectPos + 1 + def sselectPos: Int = 0 + def svaluePos: Int = 2 + + val mvalue = UInt(mvalueBits.W) + val mselect = Bool() + val pad2 = UInt((mselectPos - svalueBits - 2).W) + val svalue = UInt(svalueBits.W) + val pad1 = UInt(1.W) + val sselect = Bool() +} + +class BP(implicit p: Parameters) extends CoreBundle()(p) { + val control = new BPControl + val address = UInt(vaddrBits.W) + val textra = new TExtra + + def contextMatch(mcontext: UInt, scontext: UInt) = + (if (coreParams.mcontextWidth > 0) (!textra.mselect || (mcontext(textra.mvalueBits - 1, 0) === textra.mvalue)) + else true.B) && + (if (coreParams.scontextWidth > 0) (!textra.sselect || (scontext(textra.svalueBits - 1, 0) === textra.svalue)) + else true.B) + + def mask(dummy: Int = 0) = + (0 until control.maskMax - 1).scanLeft(control.tmatch(0))((m, i) => m && address(i)).asUInt + + def pow2AddressMatch(x: UInt) = + (~x | mask()) === (~address | mask()) + + def rangeAddressMatch(x: UInt) = + (x >= address) ^ control.tmatch(0) + + def addressMatch(x: UInt) = + Mux(control.tmatch(1), rangeAddressMatch(x), pow2AddressMatch(x)) +} + +class BPWatch(val n: Int) extends Bundle() { + val valid = Vec(n, Bool()) + val rvalid = Vec(n, Bool()) + val wvalid = Vec(n, Bool()) + val ivalid = Vec(n, Bool()) + val action = UInt(3.W) +} + +class BreakpointUnit(n: Int)(implicit val p: Parameters) extends Module with HasCoreParameters { + val io = IO(new Bundle { + val status = Input(new MStatus()) + val bp = Input(Vec(n, new BP)) + val pc = Input(UInt(vaddrBits.W)) + val ea = Input(UInt(vaddrBits.W)) + val mcontext = Input(UInt(coreParams.mcontextWidth.W)) + val scontext = Input(UInt(coreParams.scontextWidth.W)) + val xcpt_if = Output(Bool()) + val xcpt_ld = Output(Bool()) + val xcpt_st = Output(Bool()) + val debug_if = Output(Bool()) + val debug_ld = Output(Bool()) + val debug_st = Output(Bool()) + val bpwatch = Output(Vec(n, new BPWatch(1))) + }) + + io.xcpt_if := false.B + io.xcpt_ld := false.B + io.xcpt_st := false.B + io.debug_if := false.B + io.debug_ld := false.B + io.debug_st := false.B + + (io.bpwatch.zip(io.bp)).foldLeft((true.B, true.B, true.B)) { + case ((ri, wi, xi), (bpw, bp)) => + val en = bp.control.enabled(io.status) + val cx = bp.contextMatch(io.mcontext, io.scontext) + val r = en && bp.control.r && bp.addressMatch(io.ea) && cx + val w = en && bp.control.w && bp.addressMatch(io.ea) && cx + val x = en && bp.control.x && bp.addressMatch(io.pc) && cx + val end = !bp.control.chain + val action = bp.control.action + + bpw.action := action + bpw.valid(0) := false.B + bpw.rvalid(0) := false.B + bpw.wvalid(0) := false.B + bpw.ivalid(0) := false.B + + when(end && r && ri) { + io.xcpt_ld := (action === 0.U); io.debug_ld := (action === 1.U); bpw.valid(0) := true.B; bpw.rvalid(0) := true.B + } + when(end && w && wi) { + io.xcpt_st := (action === 0.U); io.debug_st := (action === 1.U); bpw.valid(0) := true.B; bpw.wvalid(0) := true.B + } + when(end && x && xi) { + io.xcpt_if := (action === 0.U); io.debug_if := (action === 1.U); bpw.valid(0) := true.B; bpw.ivalid(0) := true.B + } + + (end || r, end || w, end || x) + } +} From ad9ebf3c2125378f631d093eec84a609d201ee96 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 13:46:37 +0800 Subject: [PATCH 029/140] [rocketv] migrate BreakpointUnit --- rocketv/src/Breakpoint.scala | 142 ++++++++++----------------------- rocketv/src/Bundle.scala | 148 +++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+), 102 deletions(-) create mode 100644 rocketv/src/Bundle.scala diff --git a/rocketv/src/Breakpoint.scala b/rocketv/src/Breakpoint.scala index 51a683a8f..e55a28d64 100644 --- a/rocketv/src/Breakpoint.scala +++ b/rocketv/src/Breakpoint.scala @@ -1,100 +1,38 @@ -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{Cat} -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.tile.{CoreBundle, HasCoreParameters} -import freechips.rocketchip.util._ - -class BPControl(implicit p: Parameters) extends CoreBundle()(p) { - val ttype = UInt(4.W) - val dmode = Bool() - val maskmax = UInt(6.W) - val reserved = UInt((xLen - (if (coreParams.useBPWatch) 26 else 24)).W) - val action = UInt((if (coreParams.useBPWatch) 3 else 1).W) - val chain = Bool() - val zero = UInt(2.W) - val tmatch = UInt(2.W) - val m = Bool() - val h = Bool() - val s = Bool() - val u = Bool() - val x = Bool() - val w = Bool() - val r = Bool() +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} - def tType = 2 - def maskMax = 4 - def enabled(mstatus: MStatus) = !mstatus.debug && Cat(m, h, s, u)(mstatus.prv) +object BreakpointUnitParameter { + implicit def rwP: upickle.default.ReadWriter[BreakpointUnitParameter] = upickle.default.macroRW[BreakpointUnitParameter] } -class TExtra(implicit p: Parameters) extends CoreBundle()(p) { - def mvalueBits: Int = if (xLen == 32) coreParams.mcontextWidth.min(6) else coreParams.mcontextWidth.min(13) - def svalueBits: Int = if (xLen == 32) coreParams.scontextWidth.min(16) else coreParams.scontextWidth.min(34) - def mselectPos: Int = if (xLen == 32) 25 else 50 - def mvaluePos: Int = mselectPos + 1 - def sselectPos: Int = 0 - def svaluePos: Int = 2 - - val mvalue = UInt(mvalueBits.W) - val mselect = Bool() - val pad2 = UInt((mselectPos - svalueBits - 2).W) - val svalue = UInt(svalueBits.W) - val pad1 = UInt(1.W) - val sselect = Bool() +case class BreakpointUnitParameter(nBreakpoints: Int, xLen: Int, useBPWatch: Boolean, vaddrBits: Int, mcontextWidth: Int, scontextWidth: Int) extends SerializableModuleParameter + +class BreakpointUnitInterface(parameter: BreakpointUnitParameter) extends Bundle { + val status = Input(new MStatus) + val bp = Input(Vec(parameter.nBreakpoints, new BP(parameter.xLen, parameter.useBPWatch, parameter.vaddrBits, parameter.mcontextWidth, parameter.scontextWidth))) + val pc = Input(UInt(parameter.vaddrBits.W)) + val ea = Input(UInt(parameter.vaddrBits.W)) + val mcontext = Input(UInt(parameter.mcontextWidth.W)) + val scontext = Input(UInt(parameter.scontextWidth.W)) + val xcpt_if = Output(Bool()) + val xcpt_ld = Output(Bool()) + val xcpt_st = Output(Bool()) + val debug_if = Output(Bool()) + val debug_ld = Output(Bool()) + val debug_st = Output(Bool()) + val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch)) } -class BP(implicit p: Parameters) extends CoreBundle()(p) { - val control = new BPControl - val address = UInt(vaddrBits.W) - val textra = new TExtra - - def contextMatch(mcontext: UInt, scontext: UInt) = - (if (coreParams.mcontextWidth > 0) (!textra.mselect || (mcontext(textra.mvalueBits - 1, 0) === textra.mvalue)) - else true.B) && - (if (coreParams.scontextWidth > 0) (!textra.sselect || (scontext(textra.svalueBits - 1, 0) === textra.svalue)) - else true.B) - - def mask(dummy: Int = 0) = - (0 until control.maskMax - 1).scanLeft(control.tmatch(0))((m, i) => m && address(i)).asUInt - - def pow2AddressMatch(x: UInt) = - (~x | mask()) === (~address | mask()) - - def rangeAddressMatch(x: UInt) = - (x >= address) ^ control.tmatch(0) - - def addressMatch(x: UInt) = - Mux(control.tmatch(1), rangeAddressMatch(x), pow2AddressMatch(x)) -} - -class BPWatch(val n: Int) extends Bundle() { - val valid = Vec(n, Bool()) - val rvalid = Vec(n, Bool()) - val wvalid = Vec(n, Bool()) - val ivalid = Vec(n, Bool()) - val action = UInt(3.W) -} - -class BreakpointUnit(n: Int)(implicit val p: Parameters) extends Module with HasCoreParameters { - val io = IO(new Bundle { - val status = Input(new MStatus()) - val bp = Input(Vec(n, new BP)) - val pc = Input(UInt(vaddrBits.W)) - val ea = Input(UInt(vaddrBits.W)) - val mcontext = Input(UInt(coreParams.mcontextWidth.W)) - val scontext = Input(UInt(coreParams.scontextWidth.W)) - val xcpt_if = Output(Bool()) - val xcpt_ld = Output(Bool()) - val xcpt_st = Output(Bool()) - val debug_if = Output(Bool()) - val debug_ld = Output(Bool()) - val debug_st = Output(Bool()) - val bpwatch = Output(Vec(n, new BPWatch(1))) - }) - +@instantiable +class BreakpointUnit(val parameter: BreakpointUnitParameter) + extends FixedIORawModule(new BreakpointUnitInterface(parameter)) + with SerializableModule[BreakpointUnitParameter] { io.xcpt_if := false.B io.xcpt_ld := false.B io.xcpt_st := false.B @@ -104,28 +42,28 @@ class BreakpointUnit(n: Int)(implicit val p: Parameters) extends Module with Has (io.bpwatch.zip(io.bp)).foldLeft((true.B, true.B, true.B)) { case ((ri, wi, xi), (bpw, bp)) => - val en = bp.control.enabled(io.status) - val cx = bp.contextMatch(io.mcontext, io.scontext) - val r = en && bp.control.r && bp.addressMatch(io.ea) && cx - val w = en && bp.control.w && bp.addressMatch(io.ea) && cx - val x = en && bp.control.x && bp.addressMatch(io.pc) && cx + val en = BPControl.enabled(bp.control, io.status) + val cx = BP.contextMatch(bp, io.mcontext, io.scontext, parameter.xLen, parameter.mcontextWidth, parameter.scontextWidth) + val r = en && bp.control.r && BP.addressMatch(bp, io.ea) && cx + val w = en && bp.control.w && BP.addressMatch(bp, io.ea) && cx + val x = en && bp.control.x && BP.addressMatch(bp, io.pc) && cx val end = !bp.control.chain val action = bp.control.action bpw.action := action - bpw.valid(0) := false.B - bpw.rvalid(0) := false.B - bpw.wvalid(0) := false.B - bpw.ivalid(0) := false.B + bpw.valid := false.B + bpw.rvalid := false.B + bpw.wvalid := false.B + bpw.ivalid := false.B when(end && r && ri) { - io.xcpt_ld := (action === 0.U); io.debug_ld := (action === 1.U); bpw.valid(0) := true.B; bpw.rvalid(0) := true.B + io.xcpt_ld := (action === 0.U); io.debug_ld := (action === 1.U); bpw.valid := true.B; bpw.rvalid := true.B } when(end && w && wi) { - io.xcpt_st := (action === 0.U); io.debug_st := (action === 1.U); bpw.valid(0) := true.B; bpw.wvalid(0) := true.B + io.xcpt_st := (action === 0.U); io.debug_st := (action === 1.U); bpw.valid := true.B; bpw.wvalid := true.B } when(end && x && xi) { - io.xcpt_if := (action === 0.U); io.debug_if := (action === 1.U); bpw.valid(0) := true.B; bpw.ivalid(0) := true.B + io.xcpt_if := (action === 0.U); io.debug_if := (action === 1.U); bpw.valid := true.B; bpw.ivalid := true.B } (end || r, end || w, end || x) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala new file mode 100644 index 000000000..c1724c2e9 --- /dev/null +++ b/rocketv/src/Bundle.scala @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.Cat + +// This file defines Bundle shared in the project. +// all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. + +object MStatus { + object PRV { + val SZ = 2 + val U = 0 + val S = 1 + val H = 2 + val M = 3 + } +} + +class MStatus extends Bundle { + import MStatus._ + // not truly part of mstatus, but convenient + val debug = Bool() + val cease = Bool() + val wfi = Bool() + val isa = UInt(32.W) + + val dprv = UInt(PRV.SZ.W) // effective prv for data accesses + val dv = Bool() // effective v for data accesses + val prv = UInt(PRV.SZ.W) + val v = Bool() + + val sd = Bool() + val zero2 = UInt(23.W) + val mpv = Bool() + val gva = Bool() + val mbe = Bool() + val sbe = Bool() + val sxl = UInt(2.W) + val uxl = UInt(2.W) + val sd_rv32 = Bool() + val zero1 = UInt(8.W) + val tsr = Bool() + val tw = Bool() + val tvm = Bool() + val mxr = Bool() + val sum = Bool() + val mprv = Bool() + val xs = UInt(2.W) + val fs = UInt(2.W) + val mpp = UInt(2.W) + val vs = UInt(2.W) + val spp = UInt(1.W) + val mpie = Bool() + val ube = Bool() + val spie = Bool() + val upie = Bool() + val mie = Bool() + val hie = Bool() + val sie = Bool() + val uie = Bool() +} + +object BP { + def contextMatch(bp: BP, mcontext: UInt, scontext: UInt, xLen: Int, mcontextWidth: Int, scontextWidth: Int): Bool = + (if (mcontextWidth > 0) + !bp.textra.mselect || (mcontext(TExtra.mvalueBits(xLen, mcontextWidth) - 1, 0) === bp.textra.mvalue) + else true.B) && + (if (scontextWidth > 0) + !bp.textra.sselect || (scontext(TExtra.svalueBits(xLen, scontextWidth) - 1, 0) === bp.textra.svalue) + else true.B + ) + + def addressMatch(bp: BP, x: UInt) = { + def rangeAddressMatch(x: UInt) = + (x >= bp.address) ^ bp.control.tmatch(0) + + def pow2AddressMatch(x: UInt): Bool = { + def mask(): UInt = { + import chisel3.experimental.conversions.seq2vec + def maskMax = 4 + (0 until maskMax - 1).scanLeft(bp.control.tmatch(0))((m, i) => m && bp.address(i)).asUInt + } + (~x | mask()) === (~bp.address | mask()) + } + Mux(bp.control.tmatch(1), rangeAddressMatch(x), pow2AddressMatch(x)) + } +} + +class BP(xLen: Int, useBPWatch: Boolean, vaddrBits: Int, mcontextWidth: Int, scontextWidth: Int) extends Bundle { + val control = new BPControl(xLen, useBPWatch) + val address = UInt(vaddrBits.W) + val textra = new TExtra(xLen, mcontextWidth, scontextWidth) +} + +object BPControl { + def enabled(bpControl: BPControl, mstatus: MStatus): Bool = !mstatus.debug && Cat(bpControl.m, bpControl.h, bpControl.s, bpControl.u)(mstatus.prv) +} + +class BPControl(xLen: Int, useBPWatch: Boolean) extends Bundle { + val ttype = UInt(4.W) + val dmode = Bool() + val maskmax = UInt(6.W) + val reserved = UInt((xLen - (if (useBPWatch) 26 else 24)).W) + val action = UInt((if (useBPWatch) 3 else 1).W) + val chain = Bool() + val zero = UInt(2.W) + val tmatch = UInt(2.W) + val m = Bool() + val h = Bool() + val s = Bool() + val u = Bool() + val x = Bool() + val w = Bool() + val r = Bool() +} + +object TExtra { + def mvalueBits(xLen: Int, mcontextWidth: Int): Int = if (xLen == 32) mcontextWidth.min(6) else mcontextWidth.min(13) + def svalueBits(xLen: Int, scontextWidth: Int): Int = if (xLen == 32) scontextWidth.min(16) else scontextWidth.min(34) + def mselectPos(xLen: Int): Int = if (xLen == 32) 25 else 50 + def mvaluePos(xLen: Int): Int = mselectPos(xLen) + 1 + def sselectPos: Int = 0 + def svaluePos: Int = 2 +} + +class TExtra(xLen: Int, mcontextWidth: Int, scontextWidth: Int) extends Bundle { + import TExtra._ + val mvalue = UInt(mvalueBits(xLen, mcontextWidth).W) + val mselect = Bool() + val pad2 = UInt((mselectPos(xLen) - svalueBits(xLen, scontextWidth) - 2).W) + val svalue = UInt(svalueBits(xLen, scontextWidth).W) + val pad1 = UInt(1.W) + val sselect = Bool() +} + +// originally in RocketChip, there is (n: Int) as parameter. this is designed for retire width, +// since Rocket is a single issue core, we removed it. +class BPWatch extends Bundle() { + val valid = Bool() + val rvalid = Bool() + val wvalid = Bool() + val ivalid = Bool() + val action = UInt(3.W) +} \ No newline at end of file From 3908c85ad120036b8dafe78378ba097ea8f4a160 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 13:54:56 +0800 Subject: [PATCH 030/140] [rocketv] add elaborator for BreakpointUnit - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.BreakpointUnit config --nBreakpoints 4 --xLen 32 --useBPWatch true --vaddrBits 32 --mcontextWidth 0 --scontextWidth 0 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.BreakpointUnit design --parameter ./BreakpointUnit.json --run-firtool --- elaborator/src/rocketv/BreakpointUnit.scala | 33 +++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 elaborator/src/rocketv/BreakpointUnit.scala diff --git a/elaborator/src/rocketv/BreakpointUnit.scala b/elaborator/src/rocketv/BreakpointUnit.scala new file mode 100644 index 000000000..445f1a2e7 --- /dev/null +++ b/elaborator/src/rocketv/BreakpointUnit.scala @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{BreakpointUnit, BreakpointUnitParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object BreakpointUnit extends Elaborator { + @main + case class BreakpointUnitParameterMain( + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "xLen") xLen: Int, + @arg(name = "useBPWatch") useBPWatch: Boolean, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int) { + def convert: BreakpointUnitParameter = + BreakpointUnitParameter(nBreakpoints, xLen, useBPWatch, vaddrBits, mcontextWidth, scontextWidth) + } + + implicit def BreakpointUnitParameterMainParser: ParserForClass[BreakpointUnitParameterMain] = + ParserForClass[BreakpointUnitParameterMain] + + @main + def config(@arg(name = "parameter") parameter: BreakpointUnitParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[BreakpointUnit, BreakpointUnitParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 06701a7564da5ed097e4e17f4da4898f3671c359 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 14:13:52 +0800 Subject: [PATCH 031/140] [rocketv] copy BTB into rocketv project --- rocketv/src/BTB.scala | 339 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 rocketv/src/BTB.scala diff --git a/rocketv/src/BTB.scala b/rocketv/src/BTB.scala new file mode 100644 index 000000000..9b45b41fa --- /dev/null +++ b/rocketv/src/BTB.scala @@ -0,0 +1,339 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util._ +import freechips.rocketchip.rocket.BHTParams +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.subsystem.CacheBlockBytes +import freechips.rocketchip.tile.HasCoreParameters +import freechips.rocketchip.util._ +// TODO: Get rid of it. +import freechips.rocketchip.rocket.BTBParams + +trait HasBtbParameters extends HasCoreParameters { + // damn... tile deps rocketcore + val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0)) + val matchBits = btbParams.nMatchBits.max(log2Ceil(p(CacheBlockBytes) * tileParams.icache.get.nSets)) + val entries = btbParams.nEntries + val updatesOutOfOrder = btbParams.updatesOutOfOrder + val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages +} + +abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters { + Annotated.params(this, btbParams) +} + +abstract class BtbBundle(implicit val p: Parameters) extends Bundle with HasBtbParameters + +class RAS(nras: Int) { + def push(addr: UInt): Unit = { + when(count < nras.U) { count := count + 1.U } + val nextPos = Mux((isPow2(nras)).B || pos < (nras - 1).U, pos + 1.U, 0.U) + stack(nextPos) := addr + pos := nextPos + } + def peek: UInt = stack(pos) + def pop(): Unit = when(!isEmpty) { + count := count - 1.U + pos := Mux((isPow2(nras)).B || pos > 0.U, pos - 1.U, (nras - 1).U) + } + def clear(): Unit = count := 0.U + def isEmpty: Bool = count === 0.U + + private val count = RegInit(0.U(log2Up(nras + 1).W)) + private val pos = RegInit(0.U(log2Up(nras).W)) + private val stack = Reg(Vec(nras, UInt())) +} + +class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { + val history = UInt(btbParams.bhtParams.map(_.historyLength).getOrElse(1).W) + val value = UInt(btbParams.bhtParams.map(_.counterLength).getOrElse(1).W) + def taken = value(0) + def strongly_taken = value === 1.U +} + +// BHT contains table of 2-bit counters and a global history register. +// The BHT only predicts and updates when there is a BTB hit. +// The global history: +// - updated speculatively in fetch (if there's a BTB hit). +// - on a mispredict, the history register is reset (again, only if BTB hit). +// The counter table: +// - each counter corresponds with the address of the fetch packet ("fetch pc"). +// - updated when a branch resolves (and BTB was a hit for that branch). +// The updating branch must provide its "fetch pc". +class BHT(params: BHTParams)(implicit val p: Parameters) extends HasCoreParameters { + def index(addr: UInt, history: UInt) = { + def hashHistory(hist: UInt) = if (params.historyLength == params.historyBits) hist + else { + val k = math.sqrt(3) / 2 + val i = BigDecimal(k * math.pow(2, params.historyLength)).toBigInt + (i.U * hist)(params.historyLength - 1, params.historyLength - params.historyBits) + } + def hashAddr(addr: UInt) = { + val hi = addr >> log2Ceil(fetchBytes) + hi(log2Ceil(params.nEntries) - 1, 0) ^ (hi >> log2Ceil(params.nEntries))(1, 0) + } + hashAddr(addr) ^ (hashHistory(history) << (log2Up(params.nEntries) - params.historyBits)) + } + def get(addr: UInt): BHTResp = { + val res = Wire(new BHTResp) + res.value := Mux(resetting, 0.U, table(index(addr, history))) + res.history := history + res + } + def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = { + wen := true.B + when(!resetting) { + waddr := index(addr, d.history) + wdata := (params.counterLength match { + case 1 => taken + case 2 => Cat(taken ^ d.value(0), d.value === 1.U || d.value(1) && taken) + }) + } + } + def resetHistory(d: BHTResp): Unit = { + history := d.history + } + def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = { + history := Cat(taken, d.history >> 1) + } + def advanceHistory(taken: Bool): Unit = { + history := Cat(taken, history >> 1) + } + + private val table = Mem(params.nEntries, UInt(params.counterLength.W)) + val history = RegInit(0.U(params.historyLength.W)) + + private val reset_waddr = RegInit(0.U((params.nEntries.log2 + 1).W)) + private val resetting = !reset_waddr(params.nEntries.log2) + private val wen = WireInit(resetting) + private val waddr = WireInit(reset_waddr) + private val wdata = WireInit(0.U) + when(resetting) { reset_waddr := reset_waddr + 1.U } + when(wen) { table(waddr) := wdata } +} + +object CFIType { + def SZ = 2 + def apply() = UInt(SZ.W) + def branch = 0.U + def jump = 1.U + def call = 2.U + def ret = 3.U +} + +// BTB update occurs during branch resolution (and only on a mispredict). +// - "pc" is what future fetch PCs will tag match against. +// - "br_pc" is the PC of the branch instruction. +class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { + val prediction = new BTBResp + val pc = UInt(vaddrBits.W) + val target = UInt(vaddrBits.W) + val taken = Bool() + val isValid = Bool() + val br_pc = UInt(vaddrBits.W) + val cfiType = CFIType() +} + +// BHT update occurs during branch resolution on all conditional branches. +// - "pc" is what future fetch PCs will tag match against. +class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { + val prediction = new BHTResp + val pc = UInt(vaddrBits.W) + val branch = Bool() + val taken = Bool() + val mispredict = Bool() +} + +class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { + val cfiType = CFIType() + val returnAddr = UInt(vaddrBits.W) +} + +// - "bridx" is the low-order PC bits of the predicted branch (after +// shifting off the lowest log(inst_bytes) bits off). +// - "mask" provides a mask of valid instructions (instructions are +// masked off by the predicted taken branch from the BTB). +class BTBResp(implicit p: Parameters) extends BtbBundle()(p) { + val cfiType = CFIType() + val taken = Bool() + val mask = Bits(fetchWidth.W) + val bridx = Bits(log2Up(fetchWidth).W) + val target = UInt(vaddrBits.W) + val entry = UInt(log2Up(entries + 1).W) + val bht = new BHTResp +} + +class BTBReq(implicit p: Parameters) extends BtbBundle()(p) { + val addr = UInt(vaddrBits.W) +} + +// fully-associative branch target buffer +// Higher-performance processors may cause BTB updates to occur out-of-order, +// which requires an extra CAM port for updates (to ensure no duplicates get +// placed in BTB). +class BTB(implicit p: Parameters) extends BtbModule { + val io = IO(new Bundle { + val req = Flipped(Valid(new BTBReq)) + val resp = Valid(new BTBResp) + val btb_update = Flipped(Valid(new BTBUpdate)) + val bht_update = Flipped(Valid(new BHTUpdate)) + val bht_advance = Flipped(Valid(new BTBResp)) + val ras_update = Flipped(Valid(new RASUpdate)) + val ras_head = Valid(UInt(vaddrBits.W)) + val flush = Input(Bool()) + }) + + val idxs = Reg(Vec(entries, UInt((matchBits - log2Up(coreInstBytes)).W))) + val idxPages = Reg(Vec(entries, UInt(log2Up(nPages).W))) + val tgts = Reg(Vec(entries, UInt((matchBits - log2Up(coreInstBytes)).W))) + val tgtPages = Reg(Vec(entries, UInt(log2Up(nPages).W))) + val pages = Reg(Vec(nPages, UInt((vaddrBits - matchBits).W))) + val pageValid = RegInit(0.U(nPages.W)) + val pagesMasked = (pageValid.asBools.zip(pages)).map { case (v, p) => Mux(v, p, 0.U) } + + val isValid = RegInit(0.U(entries.W)) + val cfiType = Reg(Vec(entries, CFIType())) + val brIdx = Reg(Vec(entries, UInt(log2Up(fetchWidth).W))) + + private def page(addr: UInt) = addr >> matchBits + private def pageMatch(addr: UInt) = { + val p = page(addr) + pageValid & pages.map(_ === p).asUInt + } + private def idxMatch(addr: UInt) = { + val idx = addr(matchBits - 1, log2Up(coreInstBytes)) + idxs.map(_ === idx).asUInt & isValid + } + + val r_btb_update = Pipe(io.btb_update) + val update_target = io.req.bits.addr + + val pageHit = pageMatch(io.req.bits.addr) + val idxHit = idxMatch(io.req.bits.addr) + + val updatePageHit = pageMatch(r_btb_update.bits.pc) + val (updateHit, updateHitAddr) = + if (updatesOutOfOrder) { + val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages)) + (updateHits.orR, OHToUInt(updateHits)) + } else (r_btb_update.bits.prediction.entry < entries.U, r_btb_update.bits.prediction.entry) + + val useUpdatePageHit = updatePageHit.orR + val usePageHit = pageHit.orR + val doIdxPageRepl = !useUpdatePageHit + val nextPageRepl = RegInit(0.U(log2Ceil(nPages).W)) + val idxPageRepl = Cat(pageHit(nPages - 2, 0), pageHit(nPages - 1)) | Mux(usePageHit, 0.U, UIntToOH(nextPageRepl)) + val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) + val idxPageUpdate = OHToUInt(idxPageUpdateOH) + val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, 0.U) + + val samePage = page(r_btb_update.bits.pc) === page(update_target) + val doTgtPageRepl = !samePage && !usePageHit + val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages - 2, 0), idxPageUpdateOH(nPages - 1))) + val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, 0.U, tgtPageRepl)) + val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, 0.U) + + when(r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) { + val both = doIdxPageRepl && doTgtPageRepl + val next = nextPageRepl + Mux[UInt](both, 2.U, 1.U) + nextPageRepl := Mux(next >= nPages.U, next(0), next) + } + + val repl = new PseudoLRU(entries) + val waddr = Mux(updateHit, updateHitAddr, repl.way) + val r_resp = Pipe(io.resp) + when(r_resp.valid && r_resp.bits.taken || r_btb_update.valid) { + repl.access(Mux(r_btb_update.valid, waddr, r_resp.bits.entry)) + } + + when(r_btb_update.valid) { + val mask = UIntToOH(waddr) + idxs(waddr) := r_btb_update.bits.pc(matchBits - 1, log2Up(coreInstBytes)) + tgts(waddr) := update_target(matchBits - 1, log2Up(coreInstBytes)) + idxPages(waddr) := idxPageUpdate +& 1.U // the +1 corresponds to the <<1 on io.resp.valid + tgtPages(waddr) := tgtPageUpdate + cfiType(waddr) := r_btb_update.bits.cfiType + isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask) + if (fetchWidth > 1) + brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes) + + require(nPages % 2 == 0) + val idxWritesEven = !idxPageUpdate(0) + + def writeBank(i: Int, mod: Int, en: UInt, data: UInt) = + for (i <- i until nPages by mod) + when(en(i)) { pages(i) := data } + + writeBank( + 0, + 2, + Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn), + Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)) + ) + writeBank( + 1, + 2, + Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn), + Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)) + ) + pageValid := pageValid | tgtPageReplEn | idxPageReplEn + } + + io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages)) + io.resp.bits.taken := true.B + io.resp.bits.target := Cat(pagesMasked(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes)) + io.resp.bits.entry := OHToUInt(idxHit) + io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else 0.U) + io.resp.bits.mask := Cat((1.U << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, 0.U)) - 1.U, 1.U) + io.resp.bits.cfiType := Mux1H(idxHit, cfiType) + + // if multiple entries for same PC land in BTB, zap them + when(PopCountAtLeast(idxHit, 2)) { + isValid := isValid & ~idxHit + } + when(io.flush) { + isValid := 0.U + } + + if (btbParams.bhtParams.nonEmpty) { + val bht = new BHT(Annotated.params(this, btbParams.bhtParams.get)) + val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR + val res = bht.get(io.req.bits.addr) + when(io.bht_advance.valid) { + bht.advanceHistory(io.bht_advance.bits.bht.taken) + } + when(io.bht_update.valid) { + when(io.bht_update.bits.branch) { + bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) + when(io.bht_update.bits.mispredict) { + bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) + } + }.elsewhen(io.bht_update.bits.mispredict) { + bht.resetHistory(io.bht_update.bits.prediction) + } + } + when(!res.taken && isBranch) { io.resp.bits.taken := false.B } + io.resp.bits.bht := res + } + + if (btbParams.nRAS > 0) { + val ras = new RAS(btbParams.nRAS) + val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR + io.ras_head.valid := !ras.isEmpty + io.ras_head.bits := ras.peek + when(!ras.isEmpty && doPeek) { + io.resp.bits.target := ras.peek + } + when(io.ras_update.valid) { + when(io.ras_update.bits.cfiType === CFIType.call) { + ras.push(io.ras_update.bits.returnAddr) + }.elsewhen(io.ras_update.bits.cfiType === CFIType.ret) { + ras.pop() + } + } + } +} From 8b5a55ef25b63a7f6bc7656d22fd9936f4b4772c Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 14:43:03 +0800 Subject: [PATCH 032/140] [rocketv] migrate PopCountAtLeast --- rocketv/src/PopCountAtLeast.scala | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 rocketv/src/PopCountAtLeast.scala diff --git a/rocketv/src/PopCountAtLeast.scala b/rocketv/src/PopCountAtLeast.scala new file mode 100644 index 000000000..dc253e325 --- /dev/null +++ b/rocketv/src/PopCountAtLeast.scala @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.PopCount + +// TODO: upstream these utilities +object PopCountAtLeast { + private def two(x: UInt): (Bool, Bool) = x.getWidth match { + case 1 => (x.asBool, false.B) + case n => + val half = x.getWidth / 2 + val (leftOne, leftTwo) = two(x(half - 1, 0)) + val (rightOne, rightTwo) = two(x(x.getWidth - 1, half)) + (leftOne || rightOne, leftTwo || rightTwo || (leftOne && rightOne)) + } + def apply(x: UInt, n: Int): Bool = n match { + case 0 => true.B + case 1 => x.orR + case 2 => two(x)._2 + case 3 => PopCount(x) >= n.U + } +} From 324caa50d7958339a4c38f0683a4d4b04886602a Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 14:50:23 +0800 Subject: [PATCH 033/140] [rocketv] copy ReplacementPolicy into rocketv project --- rocketv/src/Replacement.scala | 543 ++++++++++++++++++++++++++++++++++ 1 file changed, 543 insertions(+) create mode 100644 rocketv/src/Replacement.scala diff --git a/rocketv/src/Replacement.scala b/rocketv/src/Replacement.scala new file mode 100644 index 000000000..f3a48aa7e --- /dev/null +++ b/rocketv/src/Replacement.scala @@ -0,0 +1,543 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.util + +import chisel3._ +import chisel3.util._ +import chisel3.util.random.LFSR +import freechips.rocketchip.util.property.cover + +abstract class ReplacementPolicy { + def nBits: Int + def perSet: Boolean + def way: UInt + def miss: Unit + def hit: Unit + def access(touch_way: UInt): Unit + def access(touch_ways: Seq[Valid[UInt]]): Unit + def state_read: UInt + def get_next_state(state: UInt, touch_way: UInt): UInt + def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = { + touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev)) + } + def get_replace_way(state: UInt): UInt +} + +object ReplacementPolicy { + def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match { + case "random" => new RandomReplacement(n_ways) + case "lru" => new TrueLRU(n_ways) + case "plru" => new PseudoLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } +} + +class RandomReplacement(n_ways: Int) extends ReplacementPolicy { + private val replace = Wire(Bool()) + replace := false.B + def nBits = 16 + def perSet = false + private val lfsr = LFSR(nBits, replace) + def state_read = WireDefault(lfsr) + + def way = Random(n_ways, lfsr) + def miss = replace := true.B + def hit = {} + def access(touch_way: UInt) = {} + def access(touch_ways: Seq[Valid[UInt]]) = {} + def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare + def get_replace_way(state: UInt) = way +} + +abstract class SeqReplacementPolicy { + def access(set: UInt): Unit + def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit + def way: UInt +} + +abstract class SetAssocReplacementPolicy { + def access(set: UInt, touch_way: UInt): Unit + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit + def way(set: UInt): UInt +} + +class SeqRandom(n_ways: Int) extends SeqReplacementPolicy { + val logic = new RandomReplacement(n_ways) + def access(set: UInt) = { } + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + when (valid && !hit) { logic.miss } + } + def way = logic.way +} + +class TrueLRU(n_ways: Int) extends ReplacementPolicy { + // True LRU replacement policy, using a triangular matrix to track which sets are more recently used than others. + // The matrix is packed into a single UInt (or Bits). Example 4-way (6-bits): + // [5] - 3 more recent than 2 + // [4] - 3 more recent than 1 + // [3] - 2 more recent than 1 + // [2] - 3 more recent than 0 + // [1] - 2 more recent than 0 + // [0] - 1 more recent than 0 + def nBits = (n_ways * (n_ways-1)) / 2 + def perSet = true + private val state_reg = RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + private def extractMRUVec(state: UInt): Seq[UInt] = { + // Extract per-way information about which higher-indexed ways are more recently used + val moreRecentVec = Wire(Vec(n_ways-1, UInt(n_ways.W))) + var lsb = 0 + for (i <- 0 until n_ways-1) { + moreRecentVec(i) := Cat(state(lsb+n_ways-i-2,lsb), 0.U((i+1).W)) + lsb = lsb + (n_ways - i - 1) + } + moreRecentVec + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + val nextState = Wire(Vec(n_ways-1, UInt(n_ways.W))) + val moreRecentVec = extractMRUVec(state) // reconstruct lower triangular matrix + val wayDec = UIntToOH(touch_way, n_ways) + + // Compute next value of triangular matrix + // set the touched way as more recent than every other way + nextState.zipWithIndex.map { case (e, i) => + e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec) + } + + nextState.zipWithIndex.tail.foldLeft((nextState.head.apply(n_ways-1,1),0)) { case ((pe,pi),(ce,ci)) => (Cat(ce.apply(n_ways-1,ci+1), pe), ci) }._1 + } + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (touch_ways.map(_.valid).orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + for (i <- 1 until touch_ways.size) { + cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous") + } + } + + def get_replace_way(state: UInt): UInt = { + val moreRecentVec = extractMRUVec(state) // reconstruct lower triangular matrix + // For each way, determine if all other ways are more recent + val mruWayDec = (0 until n_ways).map { i => + val upperMoreRecent = (if (i == n_ways-1) true.B else moreRecentVec(i).apply(n_ways-1,i+1).andR) + val lowerMoreRecent = (if (i == 0) true.B else moreRecentVec.map(e => !e(i)).reduce(_ && _)) + upperMoreRecent && lowerMoreRecent + } + OHToUInt(mruWayDec) + } + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} + @deprecated("replace 'replace' with 'way' from abstract class ReplacementPolicy","Rocket Chip 2020.05") + def replace: UInt = way +} + +class PseudoLRU(n_ways: Int) extends ReplacementPolicy { + // Pseudo-LRU tree algorithm: https://en.wikipedia.org/wiki/Pseudo-LRU#Tree-PLRU + // + // + // - bits storage example for 4-way PLRU binary tree: + // bit[2]: ways 3+2 older than ways 1+0 + // / \ + // bit[1]: way 3 older than way 2 bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 3-way PLRU binary tree: + // bit[1]: way 2 older than ways 1+0 + // \ + // bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 8-way PLRU binary tree: + // bit[6]: ways 7-4 older than ways 3-0 + // / \ + // bit[5]: ways 7+6 > 5+4 bit[2]: ways 3+2 > 1+0 + // / \ / \ + // bit[4]: way 7>6 bit[3]: way 5>4 bit[1]: way 3>2 bit[0]: way 1>0 + + def nBits = n_ways - 1 + def perSet = true + private val state_reg = if (nBits == 0) Reg(UInt(0.W)) else RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (touch_ways.map(_.valid).orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + for (i <- 1 until touch_ways.size) { + cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous") + } + } + + + /** @param state state_reg bits for this sub-tree + * @param touch_way touched way encoded value bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_next_state(state: UInt, touch_way: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + require(touch_way.getWidth == (log2Ceil(tree_nways) max 1), s"wrong encoded way width ${touch_way.getWidth} for $tree_nways ways") + + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val set_left_older = !touch_way(log2Ceil(tree_nways)-1) + val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(set_left_older, + Mux(set_left_older, + left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree + get_next_state(left_subtree_state, touch_way.extract(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(set_left_older, + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value + !touch_way(0) + } else { // tree_nways <= 1 + // we are at an empty node in an empty tree for 1 way, so return single zero bit for Chisel (no zero-width wires) + 0.U(1.W) + } + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) touch_way.padTo (log2Ceil(n_ways)) + else touch_way.extract(log2Ceil(n_ways)-1,0) + get_next_state(state, touch_way_sized, n_ways) + } + + + /** @param state state_reg bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_replace_way(state: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + + // this algorithm recursively descends the binary tree, filling in the way-to-replace encoded value from msb to lsb + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val left_subtree_older = state(tree_nways-2) + val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right + get_replace_way(left_subtree_state, left_nways), // recurse left + get_replace_way(right_subtree_state, right_nways))) // recurse right + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right + 0.U(1.W), + get_replace_way(right_subtree_state, right_nways))) // recurse right + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value + state(0) + } else { // tree_nways <= 1 + // we are at an empty node in an unbalanced tree for non-power-of-2 ways, so return single zero bit as lsb of the way-to-replace encoded value + 0.U(1.W) + } + } + + def get_replace_way(state: UInt): UInt = get_replace_way(state, n_ways) + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} +} + +class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { + val logic = new PseudoLRU(n_ways) + val state = SyncReadMem(n_sets, UInt(logic.nBits.W)) + val current_state = Wire(UInt(logic.nBits.W)) + val next_state = Wire(UInt(logic.nBits.W)) + val plru_way = logic.get_replace_way(current_state) + + def access(set: UInt) = { + current_state := state.read(set) + } + + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + val update_way = Mux(hit, way, plru_way) + next_state := logic.get_next_state(current_state, update_way) + when (valid) { state.write(set, next_state) } + } + + def way = plru_way +} + + +class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy { + val logic = policy.toLowerCase match { + case "plru" => new PseudoLRU(n_ways) + case "lru" => new TrueLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } + val state_vec = + if (logic.nBits == 0) Reg(Vec(n_sets, UInt(logic.nBits.W))) // Work around elaboration error on following line + else RegInit(VecInit(Seq.fill(n_sets)(0.U(logic.nBits.W)))) + + def access(set: UInt, touch_way: UInt) = { + state_vec(set) := logic.get_next_state(state_vec(set), touch_way) + } + + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = { + require(sets.size == touch_ways.size, "internal consistency check: should be same number of simultaneous updates for sets and touch_ways") + for (set <- 0 until n_sets) { + val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) => + Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)} + when (set_touch_ways.map(_.valid).orR) { + state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways) + } + } + } + + def way(set: UInt) = logic.get_replace_way(state_vec(set)) + +} + +// Synthesizable unit tests +import freechips.rocketchip.unittest._ + +class PLRUTest(n_ways: Int, timeout: Int = 500) extends UnitTest(timeout) { + val plru = new PseudoLRU(n_ways) + + // step + io.finished := RegNext(true.B, false.B) + + val get_replace_ways = (0 until (1 << (n_ways-1))).map(state => + plru.get_replace_way(state = state.U((n_ways-1).W))) + val get_next_states = (0 until (1 << (n_ways-1))).map(state => (0 until n_ways).map(way => + plru.get_next_state (state = state.U((n_ways-1).W), touch_way = way.U(log2Ceil(n_ways).W)))) + + n_ways match { + case 2 => { + assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0)) + assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1)) + assert(get_next_states(0)(0) === 1.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=1 actual=%d", get_next_states(0)(0)) + assert(get_next_states(0)(1) === 0.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=0 actual=%d", get_next_states(0)(1)) + assert(get_next_states(1)(0) === 1.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=1 actual=%d", get_next_states(1)(0)) + assert(get_next_states(1)(1) === 0.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=0 actual=%d", get_next_states(1)(1)) + } + case 3 => { + assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0)) + assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1)) + assert(get_replace_ways(2) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=2 actual=%d", get_replace_ways(2)) + assert(get_replace_ways(3) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=2 actual=%d", get_replace_ways(3)) + assert(get_next_states(0)(0) === 3.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=3 actual=%d", get_next_states(0)(0)) + assert(get_next_states(0)(1) === 2.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=2 actual=%d", get_next_states(0)(1)) + assert(get_next_states(0)(2) === 0.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=0 actual=%d", get_next_states(0)(2)) + assert(get_next_states(1)(0) === 3.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=3 actual=%d", get_next_states(1)(0)) + assert(get_next_states(1)(1) === 2.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=2 actual=%d", get_next_states(1)(1)) + assert(get_next_states(1)(2) === 1.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=1 actual=%d", get_next_states(1)(2)) + assert(get_next_states(2)(0) === 3.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=3 actual=%d", get_next_states(2)(0)) + assert(get_next_states(2)(1) === 2.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=2 actual=%d", get_next_states(2)(1)) + assert(get_next_states(2)(2) === 0.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=0 actual=%d", get_next_states(2)(2)) + assert(get_next_states(3)(0) === 3.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=3 actual=%d", get_next_states(3)(0)) + assert(get_next_states(3)(1) === 2.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=2 actual=%d", get_next_states(3)(1)) + assert(get_next_states(3)(2) === 1.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=1 actual=%d", get_next_states(3)(2)) + } + case 4 => { + assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0)) + assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1)) + assert(get_replace_ways(2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=0 actual=%d", get_replace_ways(2)) + assert(get_replace_ways(3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=1 actual=%d", get_replace_ways(3)) + assert(get_replace_ways(4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=4: expected=2 actual=%d", get_replace_ways(4)) + assert(get_replace_ways(5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=5: expected=2 actual=%d", get_replace_ways(5)) + assert(get_replace_ways(6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=6: expected=3 actual=%d", get_replace_ways(6)) + assert(get_replace_ways(7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=7: expected=3 actual=%d", get_replace_ways(7)) + assert(get_next_states(0)(0) === 5.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=5 actual=%d", get_next_states(0)(0)) + assert(get_next_states(0)(1) === 4.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=4 actual=%d", get_next_states(0)(1)) + assert(get_next_states(0)(2) === 2.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=2 actual=%d", get_next_states(0)(2)) + assert(get_next_states(0)(3) === 0.U(plru.nBits.W), s"get_next_state state=0 way=3: expected=0 actual=%d", get_next_states(0)(3)) + assert(get_next_states(1)(0) === 5.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=5 actual=%d", get_next_states(1)(0)) + assert(get_next_states(1)(1) === 4.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=4 actual=%d", get_next_states(1)(1)) + assert(get_next_states(1)(2) === 3.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=3 actual=%d", get_next_states(1)(2)) + assert(get_next_states(1)(3) === 1.U(plru.nBits.W), s"get_next_state state=1 way=3: expected=1 actual=%d", get_next_states(1)(3)) + assert(get_next_states(2)(0) === 7.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=7 actual=%d", get_next_states(2)(0)) + assert(get_next_states(2)(1) === 6.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=6 actual=%d", get_next_states(2)(1)) + assert(get_next_states(2)(2) === 2.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=2 actual=%d", get_next_states(2)(2)) + assert(get_next_states(2)(3) === 0.U(plru.nBits.W), s"get_next_state state=2 way=3: expected=0 actual=%d", get_next_states(2)(3)) + assert(get_next_states(3)(0) === 7.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=7 actual=%d", get_next_states(3)(0)) + assert(get_next_states(3)(1) === 6.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=6 actual=%d", get_next_states(3)(1)) + assert(get_next_states(3)(2) === 3.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=3 actual=%d", get_next_states(3)(2)) + assert(get_next_states(3)(3) === 1.U(plru.nBits.W), s"get_next_state state=3 way=3: expected=1 actual=%d", get_next_states(3)(3)) + assert(get_next_states(4)(0) === 5.U(plru.nBits.W), s"get_next_state state=4 way=0: expected=5 actual=%d", get_next_states(4)(0)) + assert(get_next_states(4)(1) === 4.U(plru.nBits.W), s"get_next_state state=4 way=1: expected=4 actual=%d", get_next_states(4)(1)) + assert(get_next_states(4)(2) === 2.U(plru.nBits.W), s"get_next_state state=4 way=2: expected=2 actual=%d", get_next_states(4)(2)) + assert(get_next_states(4)(3) === 0.U(plru.nBits.W), s"get_next_state state=4 way=3: expected=0 actual=%d", get_next_states(4)(3)) + assert(get_next_states(5)(0) === 5.U(plru.nBits.W), s"get_next_state state=5 way=0: expected=5 actual=%d", get_next_states(5)(0)) + assert(get_next_states(5)(1) === 4.U(plru.nBits.W), s"get_next_state state=5 way=1: expected=4 actual=%d", get_next_states(5)(1)) + assert(get_next_states(5)(2) === 3.U(plru.nBits.W), s"get_next_state state=5 way=2: expected=3 actual=%d", get_next_states(5)(2)) + assert(get_next_states(5)(3) === 1.U(plru.nBits.W), s"get_next_state state=5 way=3: expected=1 actual=%d", get_next_states(5)(3)) + assert(get_next_states(6)(0) === 7.U(plru.nBits.W), s"get_next_state state=6 way=0: expected=7 actual=%d", get_next_states(6)(0)) + assert(get_next_states(6)(1) === 6.U(plru.nBits.W), s"get_next_state state=6 way=1: expected=6 actual=%d", get_next_states(6)(1)) + assert(get_next_states(6)(2) === 2.U(plru.nBits.W), s"get_next_state state=6 way=2: expected=2 actual=%d", get_next_states(6)(2)) + assert(get_next_states(6)(3) === 0.U(plru.nBits.W), s"get_next_state state=6 way=3: expected=0 actual=%d", get_next_states(6)(3)) + assert(get_next_states(7)(0) === 7.U(plru.nBits.W), s"get_next_state state=7 way=0: expected=7 actual=%d", get_next_states(7)(0)) + assert(get_next_states(7)(1) === 6.U(plru.nBits.W), s"get_next_state state=7 way=5: expected=6 actual=%d", get_next_states(7)(1)) + assert(get_next_states(7)(2) === 3.U(plru.nBits.W), s"get_next_state state=7 way=2: expected=3 actual=%d", get_next_states(7)(2)) + assert(get_next_states(7)(3) === 1.U(plru.nBits.W), s"get_next_state state=7 way=3: expected=1 actual=%d", get_next_states(7)(3)) + } + case 5 => { + assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0)) + assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1)) + assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2)) + assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3)) + assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4)) + assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5)) + assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6)) + assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7)) + assert(get_replace_ways( 8) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=4 actual=%d", get_replace_ways( 8)) + assert(get_replace_ways( 9) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=4 actual=%d", get_replace_ways( 9)) + assert(get_replace_ways(10) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=4 actual=%d", get_replace_ways(10)) + assert(get_replace_ways(11) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=4 actual=%d", get_replace_ways(11)) + assert(get_replace_ways(12) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=4 actual=%d", get_replace_ways(12)) + assert(get_replace_ways(13) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=4 actual=%d", get_replace_ways(13)) + assert(get_replace_ways(14) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=4 actual=%d", get_replace_ways(14)) + assert(get_replace_ways(15) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=4 actual=%d", get_replace_ways(15)) + assert(get_next_states( 0)(0) === 13.U(plru.nBits.W), s"get_next_state state=00 way=0: expected=13 actual=%d", get_next_states( 0)(0)) + assert(get_next_states( 0)(1) === 12.U(plru.nBits.W), s"get_next_state state=00 way=1: expected=12 actual=%d", get_next_states( 0)(1)) + assert(get_next_states( 0)(2) === 10.U(plru.nBits.W), s"get_next_state state=00 way=2: expected=10 actual=%d", get_next_states( 0)(2)) + assert(get_next_states( 0)(3) === 8.U(plru.nBits.W), s"get_next_state state=00 way=3: expected=08 actual=%d", get_next_states( 0)(3)) + assert(get_next_states( 0)(4) === 0.U(plru.nBits.W), s"get_next_state state=00 way=4: expected=00 actual=%d", get_next_states( 0)(4)) + assert(get_next_states( 1)(0) === 13.U(plru.nBits.W), s"get_next_state state=01 way=0: expected=13 actual=%d", get_next_states( 1)(0)) + assert(get_next_states( 1)(1) === 12.U(plru.nBits.W), s"get_next_state state=01 way=1: expected=12 actual=%d", get_next_states( 1)(1)) + assert(get_next_states( 1)(2) === 11.U(plru.nBits.W), s"get_next_state state=01 way=2: expected=11 actual=%d", get_next_states( 1)(2)) + assert(get_next_states( 1)(3) === 9.U(plru.nBits.W), s"get_next_state state=01 way=3: expected=09 actual=%d", get_next_states( 1)(3)) + assert(get_next_states( 1)(4) === 1.U(plru.nBits.W), s"get_next_state state=01 way=4: expected=01 actual=%d", get_next_states( 1)(4)) + assert(get_next_states( 2)(0) === 15.U(plru.nBits.W), s"get_next_state state=02 way=0: expected=15 actual=%d", get_next_states( 2)(0)) + assert(get_next_states( 2)(1) === 14.U(plru.nBits.W), s"get_next_state state=02 way=1: expected=14 actual=%d", get_next_states( 2)(1)) + assert(get_next_states( 2)(2) === 10.U(plru.nBits.W), s"get_next_state state=02 way=2: expected=10 actual=%d", get_next_states( 2)(2)) + assert(get_next_states( 2)(3) === 8.U(plru.nBits.W), s"get_next_state state=02 way=3: expected=08 actual=%d", get_next_states( 2)(3)) + assert(get_next_states( 2)(4) === 2.U(plru.nBits.W), s"get_next_state state=02 way=4: expected=02 actual=%d", get_next_states( 2)(4)) + assert(get_next_states( 3)(0) === 15.U(plru.nBits.W), s"get_next_state state=03 way=0: expected=15 actual=%d", get_next_states( 3)(0)) + assert(get_next_states( 3)(1) === 14.U(plru.nBits.W), s"get_next_state state=03 way=1: expected=14 actual=%d", get_next_states( 3)(1)) + assert(get_next_states( 3)(2) === 11.U(plru.nBits.W), s"get_next_state state=03 way=2: expected=11 actual=%d", get_next_states( 3)(2)) + assert(get_next_states( 3)(3) === 9.U(plru.nBits.W), s"get_next_state state=03 way=3: expected=09 actual=%d", get_next_states( 3)(3)) + assert(get_next_states( 3)(4) === 3.U(plru.nBits.W), s"get_next_state state=03 way=4: expected=03 actual=%d", get_next_states( 3)(4)) + assert(get_next_states( 4)(0) === 13.U(plru.nBits.W), s"get_next_state state=04 way=0: expected=13 actual=%d", get_next_states( 4)(0)) + assert(get_next_states( 4)(1) === 12.U(plru.nBits.W), s"get_next_state state=04 way=1: expected=12 actual=%d", get_next_states( 4)(1)) + assert(get_next_states( 4)(2) === 10.U(plru.nBits.W), s"get_next_state state=04 way=2: expected=10 actual=%d", get_next_states( 4)(2)) + assert(get_next_states( 4)(3) === 8.U(plru.nBits.W), s"get_next_state state=04 way=3: expected=08 actual=%d", get_next_states( 4)(3)) + assert(get_next_states( 4)(4) === 4.U(plru.nBits.W), s"get_next_state state=04 way=4: expected=04 actual=%d", get_next_states( 4)(4)) + assert(get_next_states( 5)(0) === 13.U(plru.nBits.W), s"get_next_state state=05 way=0: expected=13 actual=%d", get_next_states( 5)(0)) + assert(get_next_states( 5)(1) === 12.U(plru.nBits.W), s"get_next_state state=05 way=1: expected=12 actual=%d", get_next_states( 5)(1)) + assert(get_next_states( 5)(2) === 11.U(plru.nBits.W), s"get_next_state state=05 way=2: expected=11 actual=%d", get_next_states( 5)(2)) + assert(get_next_states( 5)(3) === 9.U(plru.nBits.W), s"get_next_state state=05 way=3: expected=09 actual=%d", get_next_states( 5)(3)) + assert(get_next_states( 5)(4) === 5.U(plru.nBits.W), s"get_next_state state=05 way=4: expected=05 actual=%d", get_next_states( 5)(4)) + assert(get_next_states( 6)(0) === 15.U(plru.nBits.W), s"get_next_state state=06 way=0: expected=15 actual=%d", get_next_states( 6)(0)) + assert(get_next_states( 6)(1) === 14.U(plru.nBits.W), s"get_next_state state=06 way=1: expected=14 actual=%d", get_next_states( 6)(1)) + assert(get_next_states( 6)(2) === 10.U(plru.nBits.W), s"get_next_state state=06 way=2: expected=10 actual=%d", get_next_states( 6)(2)) + assert(get_next_states( 6)(3) === 8.U(plru.nBits.W), s"get_next_state state=06 way=3: expected=08 actual=%d", get_next_states( 6)(3)) + assert(get_next_states( 6)(4) === 6.U(plru.nBits.W), s"get_next_state state=06 way=4: expected=06 actual=%d", get_next_states( 6)(4)) + assert(get_next_states( 7)(0) === 15.U(plru.nBits.W), s"get_next_state state=07 way=0: expected=15 actual=%d", get_next_states( 7)(0)) + assert(get_next_states( 7)(1) === 14.U(plru.nBits.W), s"get_next_state state=07 way=5: expected=14 actual=%d", get_next_states( 7)(1)) + assert(get_next_states( 7)(2) === 11.U(plru.nBits.W), s"get_next_state state=07 way=2: expected=11 actual=%d", get_next_states( 7)(2)) + assert(get_next_states( 7)(3) === 9.U(plru.nBits.W), s"get_next_state state=07 way=3: expected=09 actual=%d", get_next_states( 7)(3)) + assert(get_next_states( 7)(4) === 7.U(plru.nBits.W), s"get_next_state state=07 way=4: expected=07 actual=%d", get_next_states( 7)(4)) + assert(get_next_states( 8)(0) === 13.U(plru.nBits.W), s"get_next_state state=08 way=0: expected=13 actual=%d", get_next_states( 8)(0)) + assert(get_next_states( 8)(1) === 12.U(plru.nBits.W), s"get_next_state state=08 way=1: expected=12 actual=%d", get_next_states( 8)(1)) + assert(get_next_states( 8)(2) === 10.U(plru.nBits.W), s"get_next_state state=08 way=2: expected=10 actual=%d", get_next_states( 8)(2)) + assert(get_next_states( 8)(3) === 8.U(plru.nBits.W), s"get_next_state state=08 way=3: expected=08 actual=%d", get_next_states( 8)(3)) + assert(get_next_states( 8)(4) === 0.U(plru.nBits.W), s"get_next_state state=08 way=4: expected=00 actual=%d", get_next_states( 8)(4)) + assert(get_next_states( 9)(0) === 13.U(plru.nBits.W), s"get_next_state state=09 way=0: expected=13 actual=%d", get_next_states( 9)(0)) + assert(get_next_states( 9)(1) === 12.U(plru.nBits.W), s"get_next_state state=09 way=1: expected=12 actual=%d", get_next_states( 9)(1)) + assert(get_next_states( 9)(2) === 11.U(plru.nBits.W), s"get_next_state state=09 way=2: expected=11 actual=%d", get_next_states( 9)(2)) + assert(get_next_states( 9)(3) === 9.U(plru.nBits.W), s"get_next_state state=09 way=3: expected=09 actual=%d", get_next_states( 9)(3)) + assert(get_next_states( 9)(4) === 1.U(plru.nBits.W), s"get_next_state state=09 way=4: expected=01 actual=%d", get_next_states( 9)(4)) + assert(get_next_states(10)(0) === 15.U(plru.nBits.W), s"get_next_state state=10 way=0: expected=15 actual=%d", get_next_states(10)(0)) + assert(get_next_states(10)(1) === 14.U(plru.nBits.W), s"get_next_state state=10 way=1: expected=14 actual=%d", get_next_states(10)(1)) + assert(get_next_states(10)(2) === 10.U(plru.nBits.W), s"get_next_state state=10 way=2: expected=10 actual=%d", get_next_states(10)(2)) + assert(get_next_states(10)(3) === 8.U(plru.nBits.W), s"get_next_state state=10 way=3: expected=08 actual=%d", get_next_states(10)(3)) + assert(get_next_states(10)(4) === 2.U(plru.nBits.W), s"get_next_state state=10 way=4: expected=02 actual=%d", get_next_states(10)(4)) + assert(get_next_states(11)(0) === 15.U(plru.nBits.W), s"get_next_state state=11 way=0: expected=15 actual=%d", get_next_states(11)(0)) + assert(get_next_states(11)(1) === 14.U(plru.nBits.W), s"get_next_state state=11 way=1: expected=14 actual=%d", get_next_states(11)(1)) + assert(get_next_states(11)(2) === 11.U(plru.nBits.W), s"get_next_state state=11 way=2: expected=11 actual=%d", get_next_states(11)(2)) + assert(get_next_states(11)(3) === 9.U(plru.nBits.W), s"get_next_state state=11 way=3: expected=09 actual=%d", get_next_states(11)(3)) + assert(get_next_states(11)(4) === 3.U(plru.nBits.W), s"get_next_state state=11 way=4: expected=03 actual=%d", get_next_states(11)(4)) + assert(get_next_states(12)(0) === 13.U(plru.nBits.W), s"get_next_state state=12 way=0: expected=13 actual=%d", get_next_states(12)(0)) + assert(get_next_states(12)(1) === 12.U(plru.nBits.W), s"get_next_state state=12 way=1: expected=12 actual=%d", get_next_states(12)(1)) + assert(get_next_states(12)(2) === 10.U(plru.nBits.W), s"get_next_state state=12 way=2: expected=10 actual=%d", get_next_states(12)(2)) + assert(get_next_states(12)(3) === 8.U(plru.nBits.W), s"get_next_state state=12 way=3: expected=08 actual=%d", get_next_states(12)(3)) + assert(get_next_states(12)(4) === 4.U(plru.nBits.W), s"get_next_state state=12 way=4: expected=04 actual=%d", get_next_states(12)(4)) + assert(get_next_states(13)(0) === 13.U(plru.nBits.W), s"get_next_state state=13 way=0: expected=13 actual=%d", get_next_states(13)(0)) + assert(get_next_states(13)(1) === 12.U(plru.nBits.W), s"get_next_state state=13 way=1: expected=12 actual=%d", get_next_states(13)(1)) + assert(get_next_states(13)(2) === 11.U(plru.nBits.W), s"get_next_state state=13 way=2: expected=11 actual=%d", get_next_states(13)(2)) + assert(get_next_states(13)(3) === 9.U(plru.nBits.W), s"get_next_state state=13 way=3: expected=09 actual=%d", get_next_states(13)(3)) + assert(get_next_states(13)(4) === 5.U(plru.nBits.W), s"get_next_state state=13 way=4: expected=05 actual=%d", get_next_states(13)(4)) + assert(get_next_states(14)(0) === 15.U(plru.nBits.W), s"get_next_state state=14 way=0: expected=15 actual=%d", get_next_states(14)(0)) + assert(get_next_states(14)(1) === 14.U(plru.nBits.W), s"get_next_state state=14 way=1: expected=14 actual=%d", get_next_states(14)(1)) + assert(get_next_states(14)(2) === 10.U(plru.nBits.W), s"get_next_state state=14 way=2: expected=10 actual=%d", get_next_states(14)(2)) + assert(get_next_states(14)(3) === 8.U(plru.nBits.W), s"get_next_state state=14 way=3: expected=08 actual=%d", get_next_states(14)(3)) + assert(get_next_states(14)(4) === 6.U(plru.nBits.W), s"get_next_state state=14 way=4: expected=06 actual=%d", get_next_states(14)(4)) + assert(get_next_states(15)(0) === 15.U(plru.nBits.W), s"get_next_state state=15 way=0: expected=15 actual=%d", get_next_states(15)(0)) + assert(get_next_states(15)(1) === 14.U(plru.nBits.W), s"get_next_state state=15 way=5: expected=14 actual=%d", get_next_states(15)(1)) + assert(get_next_states(15)(2) === 11.U(plru.nBits.W), s"get_next_state state=15 way=2: expected=11 actual=%d", get_next_states(15)(2)) + assert(get_next_states(15)(3) === 9.U(plru.nBits.W), s"get_next_state state=15 way=3: expected=09 actual=%d", get_next_states(15)(3)) + assert(get_next_states(15)(4) === 7.U(plru.nBits.W), s"get_next_state state=15 way=4: expected=07 actual=%d", get_next_states(15)(4)) + } + case 6 => { + assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0)) + assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1)) + assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2)) + assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3)) + assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4)) + assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5)) + assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6)) + assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7)) + assert(get_replace_ways( 8) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=0 actual=%d", get_replace_ways( 8)) + assert(get_replace_ways( 9) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=1 actual=%d", get_replace_ways( 9)) + assert(get_replace_ways(10) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=0 actual=%d", get_replace_ways(10)) + assert(get_replace_ways(11) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=1 actual=%d", get_replace_ways(11)) + assert(get_replace_ways(12) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=2 actual=%d", get_replace_ways(12)) + assert(get_replace_ways(13) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=2 actual=%d", get_replace_ways(13)) + assert(get_replace_ways(14) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=3 actual=%d", get_replace_ways(14)) + assert(get_replace_ways(15) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=3 actual=%d", get_replace_ways(15)) + assert(get_replace_ways(16) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=16: expected=4 actual=%d", get_replace_ways(16)) + assert(get_replace_ways(17) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=17: expected=4 actual=%d", get_replace_ways(17)) + assert(get_replace_ways(18) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=18: expected=4 actual=%d", get_replace_ways(18)) + assert(get_replace_ways(19) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=19: expected=4 actual=%d", get_replace_ways(19)) + assert(get_replace_ways(20) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=20: expected=4 actual=%d", get_replace_ways(20)) + assert(get_replace_ways(21) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=21: expected=4 actual=%d", get_replace_ways(21)) + assert(get_replace_ways(22) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=22: expected=4 actual=%d", get_replace_ways(22)) + assert(get_replace_ways(23) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=23: expected=4 actual=%d", get_replace_ways(23)) + assert(get_replace_ways(24) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=24: expected=5 actual=%d", get_replace_ways(24)) + assert(get_replace_ways(25) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=25: expected=5 actual=%d", get_replace_ways(25)) + assert(get_replace_ways(26) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=26: expected=5 actual=%d", get_replace_ways(26)) + assert(get_replace_ways(27) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=27: expected=5 actual=%d", get_replace_ways(27)) + assert(get_replace_ways(28) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=28: expected=5 actual=%d", get_replace_ways(28)) + assert(get_replace_ways(29) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=29: expected=5 actual=%d", get_replace_ways(29)) + assert(get_replace_ways(30) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=30: expected=5 actual=%d", get_replace_ways(30)) + assert(get_replace_ways(31) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=31: expected=5 actual=%d", get_replace_ways(31)) + } + case _ => throw new IllegalArgumentException(s"no test pattern found for n_ways=$n_ways") + } +} From c91252515afda60ef144fb5700600896050692b3 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 17:44:21 +0800 Subject: [PATCH 034/140] [rocketv] migrate Replacement --- rocketv/src/Replacement.scala | 311 +++++++--------------------------- 1 file changed, 57 insertions(+), 254 deletions(-) diff --git a/rocketv/src/Replacement.scala b/rocketv/src/Replacement.scala index f3a48aa7e..fabb2f331 100644 --- a/rocketv/src/Replacement.scala +++ b/rocketv/src/Replacement.scala @@ -1,12 +1,13 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu -package freechips.rocketchip.util +package org.chipsalliance.rocketv import chisel3._ import chisel3.util._ import chisel3.util.random.LFSR -import freechips.rocketchip.util.property.cover abstract class ReplacementPolicy { def nBits: Int @@ -24,6 +25,24 @@ abstract class ReplacementPolicy { def get_replace_way(state: UInt): UInt } +object Random +{ + def apply(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) random(log2Ceil(mod)-1,0) + else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod)) + } + def apply(mod: Int): UInt = apply(mod, randomizer) + def oneHot(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0)) + else VecInit(PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod))).asUInt + } + def oneHot(mod: Int): UInt = oneHot(mod, randomizer) + + private def randomizer = LFSR(16) + private def partition(value: UInt, slices: Int) = + Seq.tabulate(slices)(i => value < (((i + 1) << value.getWidth) / slices).U) +} + object ReplacementPolicy { def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match { case "random" => new RandomReplacement(n_ways) @@ -103,7 +122,7 @@ class TrueLRU(n_ways: Int) extends ReplacementPolicy { // Compute next value of triangular matrix // set the touched way as more recent than every other way - nextState.zipWithIndex.map { case (e, i) => + nextState.zipWithIndex.foreach { case (e, i) => e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec) } @@ -114,12 +133,12 @@ class TrueLRU(n_ways: Int) extends ReplacementPolicy { state_reg := get_next_state(state_reg, touch_way) } def access(touch_ways: Seq[Valid[UInt]]): Unit = { - when (touch_ways.map(_.valid).orR) { + when (VecInit(touch_ways.map(_.valid)).asUInt.orR) { state_reg := get_next_state(state_reg, touch_ways) } - for (i <- 1 until touch_ways.size) { - cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous") - } + // for (i <- 1 until touch_ways.size) { + // cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous") + // } } def get_replace_way(state: UInt): UInt = { @@ -172,12 +191,12 @@ class PseudoLRU(n_ways: Int) extends ReplacementPolicy { state_reg := get_next_state(state_reg, touch_way) } def access(touch_ways: Seq[Valid[UInt]]): Unit = { - when (touch_ways.map(_.valid).orR) { + when (VecInit(touch_ways.map(_.valid)).asUInt.orR) { state_reg := get_next_state(state_reg, touch_ways) } - for (i <- 1 until touch_ways.size) { - cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous") - } + // for (i <- 1 until touch_ways.size) { + // cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous") + // } } @@ -194,24 +213,24 @@ class PseudoLRU(n_ways: Int) extends ReplacementPolicy { val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree val set_left_older = !touch_way(log2Ceil(tree_nways)-1) - val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val left_subtree_state = if(tree_nways - 1 == right_nways) 0.U else state(tree_nways-3, right_nways-1) val right_subtree_state = state(right_nways-2, 0) if (left_nways > 1) { // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees Cat(set_left_older, - Mux(set_left_older, - left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree - get_next_state(left_subtree_state, touch_way.extract(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer - Mux(set_left_older, - get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer - right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + Mux(set_left_older, + left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree + get_next_state(left_subtree_state, touch_way(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree } else { // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree Cat(set_left_older, - Mux(set_left_older, - get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer - right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree } } else if (tree_nways == 2) { // we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value @@ -223,8 +242,13 @@ class PseudoLRU(n_ways: Int) extends ReplacementPolicy { } def get_next_state(state: UInt, touch_way: UInt): UInt = { - val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) touch_way.padTo (log2Ceil(n_ways)) - else touch_way.extract(log2Ceil(n_ways)-1,0) + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + + val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) padTo(touch_way, log2Ceil(n_ways)) else touch_way(log2Ceil(n_ways)-1,0) get_next_state(state, touch_way_sized, n_ways) } @@ -241,21 +265,21 @@ class PseudoLRU(n_ways: Int) extends ReplacementPolicy { val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree val left_subtree_older = state(tree_nways-2) - val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val left_subtree_state = if(tree_nways - 1 == right_nways) 0.U else state(tree_nways-3, right_nways-1) val right_subtree_state = state(right_nways-2, 0) if (left_nways > 1) { // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value - Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right - get_replace_way(left_subtree_state, left_nways), // recurse left - get_replace_way(right_subtree_state, right_nways))) // recurse right + Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right + get_replace_way(left_subtree_state, left_nways), // recurse left + get_replace_way(right_subtree_state, right_nways))) // recurse right } else { // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value - Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right - 0.U(1.W), - get_replace_way(right_subtree_state, right_nways))) // recurse right + Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right + 0.U(1.W), + get_replace_way(right_subtree_state, right_nways))) // recurse right } } else if (tree_nways == 2) { // we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value @@ -293,7 +317,6 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { def way = plru_way } - class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy { val logic = policy.toLowerCase match { case "plru" => new PseudoLRU(n_ways) @@ -313,231 +336,11 @@ class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocRepl for (set <- 0 until n_sets) { val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) => Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)} - when (set_touch_ways.map(_.valid).orR) { + when (VecInit(set_touch_ways.map(_.valid)).asUInt.orR) { state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways) } } } def way(set: UInt) = logic.get_replace_way(state_vec(set)) - -} - -// Synthesizable unit tests -import freechips.rocketchip.unittest._ - -class PLRUTest(n_ways: Int, timeout: Int = 500) extends UnitTest(timeout) { - val plru = new PseudoLRU(n_ways) - - // step - io.finished := RegNext(true.B, false.B) - - val get_replace_ways = (0 until (1 << (n_ways-1))).map(state => - plru.get_replace_way(state = state.U((n_ways-1).W))) - val get_next_states = (0 until (1 << (n_ways-1))).map(state => (0 until n_ways).map(way => - plru.get_next_state (state = state.U((n_ways-1).W), touch_way = way.U(log2Ceil(n_ways).W)))) - - n_ways match { - case 2 => { - assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0)) - assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1)) - assert(get_next_states(0)(0) === 1.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=1 actual=%d", get_next_states(0)(0)) - assert(get_next_states(0)(1) === 0.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=0 actual=%d", get_next_states(0)(1)) - assert(get_next_states(1)(0) === 1.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=1 actual=%d", get_next_states(1)(0)) - assert(get_next_states(1)(1) === 0.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=0 actual=%d", get_next_states(1)(1)) - } - case 3 => { - assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0)) - assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1)) - assert(get_replace_ways(2) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=2 actual=%d", get_replace_ways(2)) - assert(get_replace_ways(3) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=2 actual=%d", get_replace_ways(3)) - assert(get_next_states(0)(0) === 3.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=3 actual=%d", get_next_states(0)(0)) - assert(get_next_states(0)(1) === 2.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=2 actual=%d", get_next_states(0)(1)) - assert(get_next_states(0)(2) === 0.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=0 actual=%d", get_next_states(0)(2)) - assert(get_next_states(1)(0) === 3.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=3 actual=%d", get_next_states(1)(0)) - assert(get_next_states(1)(1) === 2.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=2 actual=%d", get_next_states(1)(1)) - assert(get_next_states(1)(2) === 1.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=1 actual=%d", get_next_states(1)(2)) - assert(get_next_states(2)(0) === 3.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=3 actual=%d", get_next_states(2)(0)) - assert(get_next_states(2)(1) === 2.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=2 actual=%d", get_next_states(2)(1)) - assert(get_next_states(2)(2) === 0.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=0 actual=%d", get_next_states(2)(2)) - assert(get_next_states(3)(0) === 3.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=3 actual=%d", get_next_states(3)(0)) - assert(get_next_states(3)(1) === 2.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=2 actual=%d", get_next_states(3)(1)) - assert(get_next_states(3)(2) === 1.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=1 actual=%d", get_next_states(3)(2)) - } - case 4 => { - assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0)) - assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1)) - assert(get_replace_ways(2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=0 actual=%d", get_replace_ways(2)) - assert(get_replace_ways(3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=1 actual=%d", get_replace_ways(3)) - assert(get_replace_ways(4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=4: expected=2 actual=%d", get_replace_ways(4)) - assert(get_replace_ways(5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=5: expected=2 actual=%d", get_replace_ways(5)) - assert(get_replace_ways(6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=6: expected=3 actual=%d", get_replace_ways(6)) - assert(get_replace_ways(7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=7: expected=3 actual=%d", get_replace_ways(7)) - assert(get_next_states(0)(0) === 5.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=5 actual=%d", get_next_states(0)(0)) - assert(get_next_states(0)(1) === 4.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=4 actual=%d", get_next_states(0)(1)) - assert(get_next_states(0)(2) === 2.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=2 actual=%d", get_next_states(0)(2)) - assert(get_next_states(0)(3) === 0.U(plru.nBits.W), s"get_next_state state=0 way=3: expected=0 actual=%d", get_next_states(0)(3)) - assert(get_next_states(1)(0) === 5.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=5 actual=%d", get_next_states(1)(0)) - assert(get_next_states(1)(1) === 4.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=4 actual=%d", get_next_states(1)(1)) - assert(get_next_states(1)(2) === 3.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=3 actual=%d", get_next_states(1)(2)) - assert(get_next_states(1)(3) === 1.U(plru.nBits.W), s"get_next_state state=1 way=3: expected=1 actual=%d", get_next_states(1)(3)) - assert(get_next_states(2)(0) === 7.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=7 actual=%d", get_next_states(2)(0)) - assert(get_next_states(2)(1) === 6.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=6 actual=%d", get_next_states(2)(1)) - assert(get_next_states(2)(2) === 2.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=2 actual=%d", get_next_states(2)(2)) - assert(get_next_states(2)(3) === 0.U(plru.nBits.W), s"get_next_state state=2 way=3: expected=0 actual=%d", get_next_states(2)(3)) - assert(get_next_states(3)(0) === 7.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=7 actual=%d", get_next_states(3)(0)) - assert(get_next_states(3)(1) === 6.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=6 actual=%d", get_next_states(3)(1)) - assert(get_next_states(3)(2) === 3.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=3 actual=%d", get_next_states(3)(2)) - assert(get_next_states(3)(3) === 1.U(plru.nBits.W), s"get_next_state state=3 way=3: expected=1 actual=%d", get_next_states(3)(3)) - assert(get_next_states(4)(0) === 5.U(plru.nBits.W), s"get_next_state state=4 way=0: expected=5 actual=%d", get_next_states(4)(0)) - assert(get_next_states(4)(1) === 4.U(plru.nBits.W), s"get_next_state state=4 way=1: expected=4 actual=%d", get_next_states(4)(1)) - assert(get_next_states(4)(2) === 2.U(plru.nBits.W), s"get_next_state state=4 way=2: expected=2 actual=%d", get_next_states(4)(2)) - assert(get_next_states(4)(3) === 0.U(plru.nBits.W), s"get_next_state state=4 way=3: expected=0 actual=%d", get_next_states(4)(3)) - assert(get_next_states(5)(0) === 5.U(plru.nBits.W), s"get_next_state state=5 way=0: expected=5 actual=%d", get_next_states(5)(0)) - assert(get_next_states(5)(1) === 4.U(plru.nBits.W), s"get_next_state state=5 way=1: expected=4 actual=%d", get_next_states(5)(1)) - assert(get_next_states(5)(2) === 3.U(plru.nBits.W), s"get_next_state state=5 way=2: expected=3 actual=%d", get_next_states(5)(2)) - assert(get_next_states(5)(3) === 1.U(plru.nBits.W), s"get_next_state state=5 way=3: expected=1 actual=%d", get_next_states(5)(3)) - assert(get_next_states(6)(0) === 7.U(plru.nBits.W), s"get_next_state state=6 way=0: expected=7 actual=%d", get_next_states(6)(0)) - assert(get_next_states(6)(1) === 6.U(plru.nBits.W), s"get_next_state state=6 way=1: expected=6 actual=%d", get_next_states(6)(1)) - assert(get_next_states(6)(2) === 2.U(plru.nBits.W), s"get_next_state state=6 way=2: expected=2 actual=%d", get_next_states(6)(2)) - assert(get_next_states(6)(3) === 0.U(plru.nBits.W), s"get_next_state state=6 way=3: expected=0 actual=%d", get_next_states(6)(3)) - assert(get_next_states(7)(0) === 7.U(plru.nBits.W), s"get_next_state state=7 way=0: expected=7 actual=%d", get_next_states(7)(0)) - assert(get_next_states(7)(1) === 6.U(plru.nBits.W), s"get_next_state state=7 way=5: expected=6 actual=%d", get_next_states(7)(1)) - assert(get_next_states(7)(2) === 3.U(plru.nBits.W), s"get_next_state state=7 way=2: expected=3 actual=%d", get_next_states(7)(2)) - assert(get_next_states(7)(3) === 1.U(plru.nBits.W), s"get_next_state state=7 way=3: expected=1 actual=%d", get_next_states(7)(3)) - } - case 5 => { - assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0)) - assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1)) - assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2)) - assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3)) - assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4)) - assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5)) - assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6)) - assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7)) - assert(get_replace_ways( 8) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=4 actual=%d", get_replace_ways( 8)) - assert(get_replace_ways( 9) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=4 actual=%d", get_replace_ways( 9)) - assert(get_replace_ways(10) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=4 actual=%d", get_replace_ways(10)) - assert(get_replace_ways(11) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=4 actual=%d", get_replace_ways(11)) - assert(get_replace_ways(12) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=4 actual=%d", get_replace_ways(12)) - assert(get_replace_ways(13) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=4 actual=%d", get_replace_ways(13)) - assert(get_replace_ways(14) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=4 actual=%d", get_replace_ways(14)) - assert(get_replace_ways(15) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=4 actual=%d", get_replace_ways(15)) - assert(get_next_states( 0)(0) === 13.U(plru.nBits.W), s"get_next_state state=00 way=0: expected=13 actual=%d", get_next_states( 0)(0)) - assert(get_next_states( 0)(1) === 12.U(plru.nBits.W), s"get_next_state state=00 way=1: expected=12 actual=%d", get_next_states( 0)(1)) - assert(get_next_states( 0)(2) === 10.U(plru.nBits.W), s"get_next_state state=00 way=2: expected=10 actual=%d", get_next_states( 0)(2)) - assert(get_next_states( 0)(3) === 8.U(plru.nBits.W), s"get_next_state state=00 way=3: expected=08 actual=%d", get_next_states( 0)(3)) - assert(get_next_states( 0)(4) === 0.U(plru.nBits.W), s"get_next_state state=00 way=4: expected=00 actual=%d", get_next_states( 0)(4)) - assert(get_next_states( 1)(0) === 13.U(plru.nBits.W), s"get_next_state state=01 way=0: expected=13 actual=%d", get_next_states( 1)(0)) - assert(get_next_states( 1)(1) === 12.U(plru.nBits.W), s"get_next_state state=01 way=1: expected=12 actual=%d", get_next_states( 1)(1)) - assert(get_next_states( 1)(2) === 11.U(plru.nBits.W), s"get_next_state state=01 way=2: expected=11 actual=%d", get_next_states( 1)(2)) - assert(get_next_states( 1)(3) === 9.U(plru.nBits.W), s"get_next_state state=01 way=3: expected=09 actual=%d", get_next_states( 1)(3)) - assert(get_next_states( 1)(4) === 1.U(plru.nBits.W), s"get_next_state state=01 way=4: expected=01 actual=%d", get_next_states( 1)(4)) - assert(get_next_states( 2)(0) === 15.U(plru.nBits.W), s"get_next_state state=02 way=0: expected=15 actual=%d", get_next_states( 2)(0)) - assert(get_next_states( 2)(1) === 14.U(plru.nBits.W), s"get_next_state state=02 way=1: expected=14 actual=%d", get_next_states( 2)(1)) - assert(get_next_states( 2)(2) === 10.U(plru.nBits.W), s"get_next_state state=02 way=2: expected=10 actual=%d", get_next_states( 2)(2)) - assert(get_next_states( 2)(3) === 8.U(plru.nBits.W), s"get_next_state state=02 way=3: expected=08 actual=%d", get_next_states( 2)(3)) - assert(get_next_states( 2)(4) === 2.U(plru.nBits.W), s"get_next_state state=02 way=4: expected=02 actual=%d", get_next_states( 2)(4)) - assert(get_next_states( 3)(0) === 15.U(plru.nBits.W), s"get_next_state state=03 way=0: expected=15 actual=%d", get_next_states( 3)(0)) - assert(get_next_states( 3)(1) === 14.U(plru.nBits.W), s"get_next_state state=03 way=1: expected=14 actual=%d", get_next_states( 3)(1)) - assert(get_next_states( 3)(2) === 11.U(plru.nBits.W), s"get_next_state state=03 way=2: expected=11 actual=%d", get_next_states( 3)(2)) - assert(get_next_states( 3)(3) === 9.U(plru.nBits.W), s"get_next_state state=03 way=3: expected=09 actual=%d", get_next_states( 3)(3)) - assert(get_next_states( 3)(4) === 3.U(plru.nBits.W), s"get_next_state state=03 way=4: expected=03 actual=%d", get_next_states( 3)(4)) - assert(get_next_states( 4)(0) === 13.U(plru.nBits.W), s"get_next_state state=04 way=0: expected=13 actual=%d", get_next_states( 4)(0)) - assert(get_next_states( 4)(1) === 12.U(plru.nBits.W), s"get_next_state state=04 way=1: expected=12 actual=%d", get_next_states( 4)(1)) - assert(get_next_states( 4)(2) === 10.U(plru.nBits.W), s"get_next_state state=04 way=2: expected=10 actual=%d", get_next_states( 4)(2)) - assert(get_next_states( 4)(3) === 8.U(plru.nBits.W), s"get_next_state state=04 way=3: expected=08 actual=%d", get_next_states( 4)(3)) - assert(get_next_states( 4)(4) === 4.U(plru.nBits.W), s"get_next_state state=04 way=4: expected=04 actual=%d", get_next_states( 4)(4)) - assert(get_next_states( 5)(0) === 13.U(plru.nBits.W), s"get_next_state state=05 way=0: expected=13 actual=%d", get_next_states( 5)(0)) - assert(get_next_states( 5)(1) === 12.U(plru.nBits.W), s"get_next_state state=05 way=1: expected=12 actual=%d", get_next_states( 5)(1)) - assert(get_next_states( 5)(2) === 11.U(plru.nBits.W), s"get_next_state state=05 way=2: expected=11 actual=%d", get_next_states( 5)(2)) - assert(get_next_states( 5)(3) === 9.U(plru.nBits.W), s"get_next_state state=05 way=3: expected=09 actual=%d", get_next_states( 5)(3)) - assert(get_next_states( 5)(4) === 5.U(plru.nBits.W), s"get_next_state state=05 way=4: expected=05 actual=%d", get_next_states( 5)(4)) - assert(get_next_states( 6)(0) === 15.U(plru.nBits.W), s"get_next_state state=06 way=0: expected=15 actual=%d", get_next_states( 6)(0)) - assert(get_next_states( 6)(1) === 14.U(plru.nBits.W), s"get_next_state state=06 way=1: expected=14 actual=%d", get_next_states( 6)(1)) - assert(get_next_states( 6)(2) === 10.U(plru.nBits.W), s"get_next_state state=06 way=2: expected=10 actual=%d", get_next_states( 6)(2)) - assert(get_next_states( 6)(3) === 8.U(plru.nBits.W), s"get_next_state state=06 way=3: expected=08 actual=%d", get_next_states( 6)(3)) - assert(get_next_states( 6)(4) === 6.U(plru.nBits.W), s"get_next_state state=06 way=4: expected=06 actual=%d", get_next_states( 6)(4)) - assert(get_next_states( 7)(0) === 15.U(plru.nBits.W), s"get_next_state state=07 way=0: expected=15 actual=%d", get_next_states( 7)(0)) - assert(get_next_states( 7)(1) === 14.U(plru.nBits.W), s"get_next_state state=07 way=5: expected=14 actual=%d", get_next_states( 7)(1)) - assert(get_next_states( 7)(2) === 11.U(plru.nBits.W), s"get_next_state state=07 way=2: expected=11 actual=%d", get_next_states( 7)(2)) - assert(get_next_states( 7)(3) === 9.U(plru.nBits.W), s"get_next_state state=07 way=3: expected=09 actual=%d", get_next_states( 7)(3)) - assert(get_next_states( 7)(4) === 7.U(plru.nBits.W), s"get_next_state state=07 way=4: expected=07 actual=%d", get_next_states( 7)(4)) - assert(get_next_states( 8)(0) === 13.U(plru.nBits.W), s"get_next_state state=08 way=0: expected=13 actual=%d", get_next_states( 8)(0)) - assert(get_next_states( 8)(1) === 12.U(plru.nBits.W), s"get_next_state state=08 way=1: expected=12 actual=%d", get_next_states( 8)(1)) - assert(get_next_states( 8)(2) === 10.U(plru.nBits.W), s"get_next_state state=08 way=2: expected=10 actual=%d", get_next_states( 8)(2)) - assert(get_next_states( 8)(3) === 8.U(plru.nBits.W), s"get_next_state state=08 way=3: expected=08 actual=%d", get_next_states( 8)(3)) - assert(get_next_states( 8)(4) === 0.U(plru.nBits.W), s"get_next_state state=08 way=4: expected=00 actual=%d", get_next_states( 8)(4)) - assert(get_next_states( 9)(0) === 13.U(plru.nBits.W), s"get_next_state state=09 way=0: expected=13 actual=%d", get_next_states( 9)(0)) - assert(get_next_states( 9)(1) === 12.U(plru.nBits.W), s"get_next_state state=09 way=1: expected=12 actual=%d", get_next_states( 9)(1)) - assert(get_next_states( 9)(2) === 11.U(plru.nBits.W), s"get_next_state state=09 way=2: expected=11 actual=%d", get_next_states( 9)(2)) - assert(get_next_states( 9)(3) === 9.U(plru.nBits.W), s"get_next_state state=09 way=3: expected=09 actual=%d", get_next_states( 9)(3)) - assert(get_next_states( 9)(4) === 1.U(plru.nBits.W), s"get_next_state state=09 way=4: expected=01 actual=%d", get_next_states( 9)(4)) - assert(get_next_states(10)(0) === 15.U(plru.nBits.W), s"get_next_state state=10 way=0: expected=15 actual=%d", get_next_states(10)(0)) - assert(get_next_states(10)(1) === 14.U(plru.nBits.W), s"get_next_state state=10 way=1: expected=14 actual=%d", get_next_states(10)(1)) - assert(get_next_states(10)(2) === 10.U(plru.nBits.W), s"get_next_state state=10 way=2: expected=10 actual=%d", get_next_states(10)(2)) - assert(get_next_states(10)(3) === 8.U(plru.nBits.W), s"get_next_state state=10 way=3: expected=08 actual=%d", get_next_states(10)(3)) - assert(get_next_states(10)(4) === 2.U(plru.nBits.W), s"get_next_state state=10 way=4: expected=02 actual=%d", get_next_states(10)(4)) - assert(get_next_states(11)(0) === 15.U(plru.nBits.W), s"get_next_state state=11 way=0: expected=15 actual=%d", get_next_states(11)(0)) - assert(get_next_states(11)(1) === 14.U(plru.nBits.W), s"get_next_state state=11 way=1: expected=14 actual=%d", get_next_states(11)(1)) - assert(get_next_states(11)(2) === 11.U(plru.nBits.W), s"get_next_state state=11 way=2: expected=11 actual=%d", get_next_states(11)(2)) - assert(get_next_states(11)(3) === 9.U(plru.nBits.W), s"get_next_state state=11 way=3: expected=09 actual=%d", get_next_states(11)(3)) - assert(get_next_states(11)(4) === 3.U(plru.nBits.W), s"get_next_state state=11 way=4: expected=03 actual=%d", get_next_states(11)(4)) - assert(get_next_states(12)(0) === 13.U(plru.nBits.W), s"get_next_state state=12 way=0: expected=13 actual=%d", get_next_states(12)(0)) - assert(get_next_states(12)(1) === 12.U(plru.nBits.W), s"get_next_state state=12 way=1: expected=12 actual=%d", get_next_states(12)(1)) - assert(get_next_states(12)(2) === 10.U(plru.nBits.W), s"get_next_state state=12 way=2: expected=10 actual=%d", get_next_states(12)(2)) - assert(get_next_states(12)(3) === 8.U(plru.nBits.W), s"get_next_state state=12 way=3: expected=08 actual=%d", get_next_states(12)(3)) - assert(get_next_states(12)(4) === 4.U(plru.nBits.W), s"get_next_state state=12 way=4: expected=04 actual=%d", get_next_states(12)(4)) - assert(get_next_states(13)(0) === 13.U(plru.nBits.W), s"get_next_state state=13 way=0: expected=13 actual=%d", get_next_states(13)(0)) - assert(get_next_states(13)(1) === 12.U(plru.nBits.W), s"get_next_state state=13 way=1: expected=12 actual=%d", get_next_states(13)(1)) - assert(get_next_states(13)(2) === 11.U(plru.nBits.W), s"get_next_state state=13 way=2: expected=11 actual=%d", get_next_states(13)(2)) - assert(get_next_states(13)(3) === 9.U(plru.nBits.W), s"get_next_state state=13 way=3: expected=09 actual=%d", get_next_states(13)(3)) - assert(get_next_states(13)(4) === 5.U(plru.nBits.W), s"get_next_state state=13 way=4: expected=05 actual=%d", get_next_states(13)(4)) - assert(get_next_states(14)(0) === 15.U(plru.nBits.W), s"get_next_state state=14 way=0: expected=15 actual=%d", get_next_states(14)(0)) - assert(get_next_states(14)(1) === 14.U(plru.nBits.W), s"get_next_state state=14 way=1: expected=14 actual=%d", get_next_states(14)(1)) - assert(get_next_states(14)(2) === 10.U(plru.nBits.W), s"get_next_state state=14 way=2: expected=10 actual=%d", get_next_states(14)(2)) - assert(get_next_states(14)(3) === 8.U(plru.nBits.W), s"get_next_state state=14 way=3: expected=08 actual=%d", get_next_states(14)(3)) - assert(get_next_states(14)(4) === 6.U(plru.nBits.W), s"get_next_state state=14 way=4: expected=06 actual=%d", get_next_states(14)(4)) - assert(get_next_states(15)(0) === 15.U(plru.nBits.W), s"get_next_state state=15 way=0: expected=15 actual=%d", get_next_states(15)(0)) - assert(get_next_states(15)(1) === 14.U(plru.nBits.W), s"get_next_state state=15 way=5: expected=14 actual=%d", get_next_states(15)(1)) - assert(get_next_states(15)(2) === 11.U(plru.nBits.W), s"get_next_state state=15 way=2: expected=11 actual=%d", get_next_states(15)(2)) - assert(get_next_states(15)(3) === 9.U(plru.nBits.W), s"get_next_state state=15 way=3: expected=09 actual=%d", get_next_states(15)(3)) - assert(get_next_states(15)(4) === 7.U(plru.nBits.W), s"get_next_state state=15 way=4: expected=07 actual=%d", get_next_states(15)(4)) - } - case 6 => { - assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0)) - assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1)) - assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2)) - assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3)) - assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4)) - assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5)) - assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6)) - assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7)) - assert(get_replace_ways( 8) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=0 actual=%d", get_replace_ways( 8)) - assert(get_replace_ways( 9) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=1 actual=%d", get_replace_ways( 9)) - assert(get_replace_ways(10) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=0 actual=%d", get_replace_ways(10)) - assert(get_replace_ways(11) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=1 actual=%d", get_replace_ways(11)) - assert(get_replace_ways(12) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=2 actual=%d", get_replace_ways(12)) - assert(get_replace_ways(13) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=2 actual=%d", get_replace_ways(13)) - assert(get_replace_ways(14) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=3 actual=%d", get_replace_ways(14)) - assert(get_replace_ways(15) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=3 actual=%d", get_replace_ways(15)) - assert(get_replace_ways(16) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=16: expected=4 actual=%d", get_replace_ways(16)) - assert(get_replace_ways(17) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=17: expected=4 actual=%d", get_replace_ways(17)) - assert(get_replace_ways(18) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=18: expected=4 actual=%d", get_replace_ways(18)) - assert(get_replace_ways(19) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=19: expected=4 actual=%d", get_replace_ways(19)) - assert(get_replace_ways(20) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=20: expected=4 actual=%d", get_replace_ways(20)) - assert(get_replace_ways(21) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=21: expected=4 actual=%d", get_replace_ways(21)) - assert(get_replace_ways(22) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=22: expected=4 actual=%d", get_replace_ways(22)) - assert(get_replace_ways(23) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=23: expected=4 actual=%d", get_replace_ways(23)) - assert(get_replace_ways(24) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=24: expected=5 actual=%d", get_replace_ways(24)) - assert(get_replace_ways(25) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=25: expected=5 actual=%d", get_replace_ways(25)) - assert(get_replace_ways(26) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=26: expected=5 actual=%d", get_replace_ways(26)) - assert(get_replace_ways(27) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=27: expected=5 actual=%d", get_replace_ways(27)) - assert(get_replace_ways(28) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=28: expected=5 actual=%d", get_replace_ways(28)) - assert(get_replace_ways(29) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=29: expected=5 actual=%d", get_replace_ways(29)) - assert(get_replace_ways(30) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=30: expected=5 actual=%d", get_replace_ways(30)) - assert(get_replace_ways(31) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=31: expected=5 actual=%d", get_replace_ways(31)) - } - case _ => throw new IllegalArgumentException(s"no test pattern found for n_ways=$n_ways") - } } From e0d14560d6ec7beba7bafb9854b3078614cf1377 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 14:43:56 +0800 Subject: [PATCH 035/140] [rocketv] migrate BTB --- rocketv/src/BTB.scala | 378 ++++++++++++++++++++------------------- rocketv/src/Bundle.scala | 107 +++++++++-- 2 files changed, 281 insertions(+), 204 deletions(-) diff --git a/rocketv/src/BTB.scala b/rocketv/src/BTB.scala index 9b45b41fa..7a7d85f29 100644 --- a/rocketv/src/BTB.scala +++ b/rocketv/src/BTB.scala @@ -1,191 +1,111 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util._ -import freechips.rocketchip.rocket.BHTParams -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.subsystem.CacheBlockBytes -import freechips.rocketchip.tile.HasCoreParameters -import freechips.rocketchip.util._ -// TODO: Get rid of it. -import freechips.rocketchip.rocket.BTBParams - -trait HasBtbParameters extends HasCoreParameters { - // damn... tile deps rocketcore - val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0)) - val matchBits = btbParams.nMatchBits.max(log2Ceil(p(CacheBlockBytes) * tileParams.icache.get.nSets)) - val entries = btbParams.nEntries - val updatesOutOfOrder = btbParams.updatesOutOfOrder - val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages -} - -abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters { - Annotated.params(this, btbParams) -} - -abstract class BtbBundle(implicit val p: Parameters) extends Bundle with HasBtbParameters - -class RAS(nras: Int) { - def push(addr: UInt): Unit = { - when(count < nras.U) { count := count + 1.U } - val nextPos = Mux((isPow2(nras)).B || pos < (nras - 1).U, pos + 1.U, 0.U) - stack(nextPos) := addr - pos := nextPos - } - def peek: UInt = stack(pos) - def pop(): Unit = when(!isEmpty) { - count := count - 1.U - pos := Mux((isPow2(nras)).B || pos > 0.U, pos - 1.U, (nras - 1).U) - } - def clear(): Unit = count := 0.U - def isEmpty: Bool = count === 0.U - - private val count = RegInit(0.U(log2Up(nras + 1).W)) - private val pos = RegInit(0.U(log2Up(nras).W)) - private val stack = Reg(Vec(nras, UInt())) -} - -class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { - val history = UInt(btbParams.bhtParams.map(_.historyLength).getOrElse(1).W) - val value = UInt(btbParams.bhtParams.map(_.counterLength).getOrElse(1).W) - def taken = value(0) - def strongly_taken = value === 1.U -} - -// BHT contains table of 2-bit counters and a global history register. -// The BHT only predicts and updates when there is a BTB hit. -// The global history: -// - updated speculatively in fetch (if there's a BTB hit). -// - on a mispredict, the history register is reset (again, only if BTB hit). -// The counter table: -// - each counter corresponds with the address of the fetch packet ("fetch pc"). -// - updated when a branch resolves (and BTB was a hit for that branch). -// The updating branch must provide its "fetch pc". -class BHT(params: BHTParams)(implicit val p: Parameters) extends HasCoreParameters { - def index(addr: UInt, history: UInt) = { - def hashHistory(hist: UInt) = if (params.historyLength == params.historyBits) hist - else { - val k = math.sqrt(3) / 2 - val i = BigDecimal(k * math.pow(2, params.historyLength)).toBigInt - (i.U * hist)(params.historyLength - 1, params.historyLength - params.historyBits) - } - def hashAddr(addr: UInt) = { - val hi = addr >> log2Ceil(fetchBytes) - hi(log2Ceil(params.nEntries) - 1, 0) ^ (hi >> log2Ceil(params.nEntries))(1, 0) - } - hashAddr(addr) ^ (hashHistory(history) << (log2Up(params.nEntries) - params.historyBits)) - } - def get(addr: UInt): BHTResp = { - val res = Wire(new BHTResp) - res.value := Mux(resetting, 0.U, table(index(addr, history))) - res.history := history - res - } - def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = { - wen := true.B - when(!resetting) { - waddr := index(addr, d.history) - wdata := (params.counterLength match { - case 1 => taken - case 2 => Cat(taken ^ d.value(0), d.value === 1.U || d.value(1) && taken) - }) - } - } - def resetHistory(d: BHTResp): Unit = { - history := d.history - } - def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = { - history := Cat(taken, d.history >> 1) - } - def advanceHistory(taken: Bool): Unit = { - history := Cat(taken, history >> 1) - } - - private val table = Mem(params.nEntries, UInt(params.counterLength.W)) - val history = RegInit(0.U(params.historyLength.W)) - private val reset_waddr = RegInit(0.U((params.nEntries.log2 + 1).W)) - private val resetting = !reset_waddr(params.nEntries.log2) - private val wen = WireInit(resetting) - private val waddr = WireInit(reset_waddr) - private val wdata = WireInit(0.U) - when(resetting) { reset_waddr := reset_waddr + 1.U } - when(wen) { table(waddr) := wdata } +object BHTParameter { + implicit def rwP: upickle.default.ReadWriter[BHTParameter] = upickle.default.macroRW[BHTParameter] } -object CFIType { - def SZ = 2 - def apply() = UInt(SZ.W) - def branch = 0.U - def jump = 1.U - def call = 2.U - def ret = 3.U -} - -// BTB update occurs during branch resolution (and only on a mispredict). -// - "pc" is what future fetch PCs will tag match against. -// - "br_pc" is the PC of the branch instruction. -class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { - val prediction = new BTBResp - val pc = UInt(vaddrBits.W) - val target = UInt(vaddrBits.W) - val taken = Bool() - val isValid = Bool() - val br_pc = UInt(vaddrBits.W) - val cfiType = CFIType() -} +case class BHTParameter(nEntries: Int, counterLength: Int, historyLength: Int, historyBits: Int) -// BHT update occurs during branch resolution on all conditional branches. -// - "pc" is what future fetch PCs will tag match against. -class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { - val prediction = new BHTResp - val pc = UInt(vaddrBits.W) - val branch = Bool() - val taken = Bool() - val mispredict = Bool() +object BTBParameter { + implicit def rwP: upickle.default.ReadWriter[BTBParameter] = upickle.default.macroRW[BTBParameter] } -class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { - val cfiType = CFIType() - val returnAddr = UInt(vaddrBits.W) +case class BTBParameter( + useAsyncReset: Boolean, + fetchBytes: Int, + vaddrBits: Int, + entries: Int, + nMatchBits: Int, + nPages: Int, + nRAS: Int, + cacheBlockBytes: Int, + iCacheSet: Int, + useCompressed: Boolean, + updatesOutOfOrder: Boolean, + fetchWidth: Int, + // below is for BHT, notice, the BHT is not a actually module:( + bhtParameter: Option[BHTParameter]) + extends SerializableModuleParameter { + val nEntries: Int = entries } -// - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). -// - "mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch from the BTB). -class BTBResp(implicit p: Parameters) extends BtbBundle()(p) { - val cfiType = CFIType() - val taken = Bool() - val mask = Bits(fetchWidth.W) - val bridx = Bits(log2Up(fetchWidth).W) - val target = UInt(vaddrBits.W) - val entry = UInt(log2Up(entries + 1).W) - val bht = new BHTResp -} - -class BTBReq(implicit p: Parameters) extends BtbBundle()(p) { - val addr = UInt(vaddrBits.W) +class BTBInterface(parameter: BTBParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if(parameter.useAsyncReset) AsyncReset() else Bool()) + val req = Flipped(Valid(new BTBReq(parameter.vaddrBits))) + val resp = Valid( + new BTBResp( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength) + ) + ) + val btb_update = Flipped( + Valid( + new BTBUpdate( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength) + ) + ) + ) + val bht_update = Flipped( + Valid( + new BHTUpdate( + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength), + parameter.vaddrBits + ) + ) + ) + val bht_advance = Flipped( + Valid( + new BTBResp( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtParameter.map(_.historyLength), + parameter.bhtParameter.map(_.counterLength) + ) + ) + ) + val ras_update = Flipped(Valid(new RASUpdate(parameter.vaddrBits))) + val ras_head = Valid(UInt(parameter.vaddrBits.W)) + val flush = Input(Bool()) } -// fully-associative branch target buffer -// Higher-performance processors may cause BTB updates to occur out-of-order, -// which requires an extra CAM port for updates (to ensure no duplicates get -// placed in BTB). -class BTB(implicit p: Parameters) extends BtbModule { - val io = IO(new Bundle { - val req = Flipped(Valid(new BTBReq)) - val resp = Valid(new BTBResp) - val btb_update = Flipped(Valid(new BTBUpdate)) - val bht_update = Flipped(Valid(new BHTUpdate)) - val bht_advance = Flipped(Valid(new BTBResp)) - val ras_update = Flipped(Valid(new RASUpdate)) - val ras_head = Valid(UInt(vaddrBits.W)) - val flush = Input(Bool()) - }) +@instantiable +class BTB(val parameter: BTBParameter) + extends FixedIORawModule(new BTBInterface(parameter)) + with SerializableModule[BTBParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + // compatibility layer + val entries = parameter.entries + val nMatchBits = parameter.nMatchBits + val matchBits = parameter.nMatchBits.max(log2Ceil(parameter.cacheBlockBytes * parameter.iCacheSet)) + val coreInstBytes = (if (parameter.useCompressed) 16 else 32) / 8 + val nPages = (parameter.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages + val vaddrBits = parameter.vaddrBits + val fetchWidth = parameter.fetchWidth + val updatesOutOfOrder = parameter.updatesOutOfOrder + // original implementation. val idxs = Reg(Vec(entries, UInt((matchBits - log2Up(coreInstBytes)).W))) val idxPages = Reg(Vec(entries, UInt(log2Up(nPages).W))) @@ -196,17 +116,17 @@ class BTB(implicit p: Parameters) extends BtbModule { val pagesMasked = (pageValid.asBools.zip(pages)).map { case (v, p) => Mux(v, p, 0.U) } val isValid = RegInit(0.U(entries.W)) - val cfiType = Reg(Vec(entries, CFIType())) + val cfiType = Reg(Vec(entries, UInt(CFIType.width.W))) val brIdx = Reg(Vec(entries, UInt(log2Up(fetchWidth).W))) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { val p = page(addr) - pageValid & pages.map(_ === p).asUInt + pageValid & VecInit(pages.map(_ === p)).asUInt } private def idxMatch(addr: UInt) = { val idx = addr(matchBits - 1, log2Up(coreInstBytes)) - idxs.map(_ === idx).asUInt & isValid + VecInit(idxs.map(_ === idx)).asUInt & isValid } val r_btb_update = Pipe(io.btb_update) @@ -285,7 +205,10 @@ class BTB(implicit p: Parameters) extends BtbModule { io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages)) io.resp.bits.taken := true.B - io.resp.bits.target := Cat(pagesMasked(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes)) + io.resp.bits.target := Cat( + VecInit(pagesMasked)(Mux1H(idxHit, tgtPages)), + Mux1H(idxHit, tgts) << log2Up(coreInstBytes) + ) io.resp.bits.entry := OHToUInt(idxHit) io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else 0.U) io.resp.bits.mask := Cat((1.U << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, 0.U)) - 1.U, 1.U) @@ -299,12 +222,74 @@ class BTB(implicit p: Parameters) extends BtbModule { isValid := 0.U } - if (btbParams.bhtParams.nonEmpty) { - val bht = new BHT(Annotated.params(this, btbParams.bhtParams.get)) - val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR + parameter.bhtParameter.foreach { bhtParameter => + /** BHT contains table of 2-bit counters and a global history register. + * The BHT only predicts and updates when there is a BTB hit. + * The global history: + * - updated speculatively in fetch (if there's a BTB hit). + * - on a mispredict, the history register is reset (again, only if BTB hit). + * The counter table: + * - each counter corresponds with the address of the fetch packet ("fetch pc"). + * - updated when a branch resolves (and BTB was a hit for that branch). + * The updating branch must provide its "fetch pc". + */ + class BHT { + def index(addr: UInt, history: UInt) = { + def hashHistory(hist: UInt) = if (bhtParameter.historyLength == bhtParameter.historyBits) hist + else { + val k = math.sqrt(3) / 2 + val i = BigDecimal(k * math.pow(2, bhtParameter.historyLength)).toBigInt + (i.U * hist)(bhtParameter.historyLength - 1, bhtParameter.historyLength - bhtParameter.historyBits) + } + def hashAddr(addr: UInt) = { + val hi = addr >> log2Ceil(parameter.fetchBytes) + hi(log2Ceil(bhtParameter.nEntries) - 1, 0) ^ (hi >> log2Ceil(bhtParameter.nEntries))(1, 0) + } + hashAddr(addr) ^ (hashHistory(history) << (log2Up(bhtParameter.nEntries) - bhtParameter.historyBits)) + } + def get(addr: UInt): BHTResp = { + val res = Wire(new BHTResp(Some(bhtParameter.historyLength), Some(bhtParameter.counterLength))) + res.value := Mux(resetting, 0.U, table(index(addr, history))) + res.history := history + res + } + def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = { + wen := true.B + when(!resetting) { + waddr := index(addr, d.history) + wdata := (bhtParameter.counterLength match { + case 1 => taken + case 2 => Cat(taken ^ d.value(0), d.value === 1.U || d.value(1) && taken) + }) + } + } + def resetHistory(d: BHTResp): Unit = { + history := d.history + } + def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = { + history := Cat(taken, d.history >> 1) + } + def advanceHistory(taken: Bool): Unit = { + history := Cat(taken, history >> 1) + } + + // todo: make sure if this is SRAM, then change it to SRAM. + private val table = Mem(bhtParameter.nEntries, UInt(bhtParameter.counterLength.W)) + val history = RegInit(0.U(bhtParameter.historyLength.W)) + + private val reset_waddr = RegInit(0.U((log2Ceil(bhtParameter.nEntries) + 1).W)) + private val resetting = !reset_waddr(log2Ceil(bhtParameter.nEntries)) + private val wen = WireInit(resetting) + private val waddr = WireInit(reset_waddr) + private val wdata = WireInit(0.U) + when(resetting) { reset_waddr := reset_waddr + 1.U } + when(wen) { table(waddr) := wdata } + } + val bht = new BHT + val isBranch = (idxHit & VecInit(cfiType.map(_ === CFIType.branch)).asUInt).orR val res = bht.get(io.req.bits.addr) when(io.bht_advance.valid) { - bht.advanceHistory(io.bht_advance.bits.bht.taken) + bht.advanceHistory(BHTResp.taken(io.bht_advance.bits.bht)) } when(io.bht_update.valid) { when(io.bht_update.bits.branch) { @@ -316,13 +301,32 @@ class BTB(implicit p: Parameters) extends BtbModule { bht.resetHistory(io.bht_update.bits.prediction) } } - when(!res.taken && isBranch) { io.resp.bits.taken := false.B } + when(!BHTResp.taken(res) && isBranch) { io.resp.bits.taken := false.B } io.resp.bits.bht := res } - if (btbParams.nRAS > 0) { - val ras = new RAS(btbParams.nRAS) - val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR + if (parameter.nRAS > 0) { + class RAS { + def push(addr: UInt): Unit = { + when(count < parameter.nRAS.U) { count := count + 1.U } + val nextPos = Mux(isPow2(parameter.nRAS).B || pos < (parameter.nRAS - 1).U, pos + 1.U, 0.U) + stack(nextPos) := addr + pos := nextPos + } + def peek: UInt = stack(pos) + def pop(): Unit = when(!isEmpty) { + count := count - 1.U + pos := Mux((isPow2(parameter.nRAS)).B || pos > 0.U, pos - 1.U, (parameter.nRAS - 1).U) + } + def clear(): Unit = count := 0.U + def isEmpty: Bool = count === 0.U + + private val count = RegInit(0.U(log2Up(parameter.nRAS + 1).W)) + private val pos = RegInit(0.U(log2Up(parameter.nRAS).W)) + private val stack = Reg(Vec(parameter.nRAS, UInt())) + } + val ras = new RAS + val doPeek = (idxHit & VecInit(cfiType.map(_ === CFIType.ret)).asUInt).orR io.ras_head.valid := !ras.isEmpty io.ras_head.bits := ras.peek when(!ras.isEmpty && doPeek) { diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index c1724c2e9..1428b5982 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -5,23 +5,21 @@ package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.Cat +import chisel3.util.{Cat, log2Ceil} // This file defines Bundle shared in the project. // all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. -object MStatus { - object PRV { - val SZ = 2 - val U = 0 - val S = 1 - val H = 2 - val M = 3 - } +// TODO: make it Enum +object PRV { + val SZ = 2 + val U = 0 + val S = 1 + val H = 2 + val M = 3 } class MStatus extends Bundle { - import MStatus._ // not truly part of mstatus, but convenient val debug = Bool() val cease = Bool() @@ -67,12 +65,11 @@ class MStatus extends Bundle { object BP { def contextMatch(bp: BP, mcontext: UInt, scontext: UInt, xLen: Int, mcontextWidth: Int, scontextWidth: Int): Bool = (if (mcontextWidth > 0) - !bp.textra.mselect || (mcontext(TExtra.mvalueBits(xLen, mcontextWidth) - 1, 0) === bp.textra.mvalue) - else true.B) && + !bp.textra.mselect || (mcontext(TExtra.mvalueBits(xLen, mcontextWidth) - 1, 0) === bp.textra.mvalue) + else true.B) && (if (scontextWidth > 0) - !bp.textra.sselect || (scontext(TExtra.svalueBits(xLen, scontextWidth) - 1, 0) === bp.textra.svalue) - else true.B - ) + !bp.textra.sselect || (scontext(TExtra.svalueBits(xLen, scontextWidth) - 1, 0) === bp.textra.svalue) + else true.B) def addressMatch(bp: BP, x: UInt) = { def rangeAddressMatch(x: UInt) = @@ -97,7 +94,8 @@ class BP(xLen: Int, useBPWatch: Boolean, vaddrBits: Int, mcontextWidth: Int, sco } object BPControl { - def enabled(bpControl: BPControl, mstatus: MStatus): Bool = !mstatus.debug && Cat(bpControl.m, bpControl.h, bpControl.s, bpControl.u)(mstatus.prv) + def enabled(bpControl: BPControl, mstatus: MStatus): Bool = + !mstatus.debug && Cat(bpControl.m, bpControl.h, bpControl.s, bpControl.u)(mstatus.prv) } class BPControl(xLen: Int, useBPWatch: Boolean) extends Bundle { @@ -145,4 +143,79 @@ class BPWatch extends Bundle() { val wvalid = Bool() val ivalid = Bool() val action = UInt(3.W) -} \ No newline at end of file +} + +class BTBReq(vaddrBits: Int) extends Bundle { + val addr = UInt(vaddrBits.W) +} + +class BTBResp( + vaddrBits: Int, + entries: Int, + fetchWidth: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int]) + extends Bundle { + + val cfiType = UInt(CFIType.width.W) + val taken = Bool() + val mask = UInt(fetchWidth.W) + val bridx = UInt(log2Ceil(fetchWidth).W) + val target = UInt(vaddrBits.W) + val entry = UInt(log2Ceil(entries + 1).W) + // @todo make it optional with bhtHistoryLength and bhtCounterLength + val bht = new BHTResp(bhtHistoryLength, bhtCounterLength) +} + +object BHTResp { + def taken(bht: BHTResp): Bool = bht.value(0) +} + +class BHTResp(bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) extends Bundle { + val history = UInt(bhtHistoryLength.getOrElse(1).W) + val value = UInt(bhtCounterLength.getOrElse(1).W) + + // @todo: change to: + // val history = bhtHistoryLength.map(i => UInt(i.W)) + // val value = bhtCounterLength.map(i => UInt(i.W)) +} + +class BTBUpdate( + vaddrBits: Int, + entries: Int, + fetchWidth: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int]) + extends Bundle { + def fetchWidth: Int = 1 + + val prediction = new BTBResp(vaddrBits, entries, fetchWidth, bhtHistoryLength, bhtCounterLength) + val pc = UInt(vaddrBits.W) + val target = UInt(vaddrBits.W) + val taken = Bool() + val isValid = Bool() + val br_pc = UInt(vaddrBits.W) + val cfiType = UInt(CFIType.width.W) +} + +class BHTUpdate(bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], vaddrBits: Int) extends Bundle { + val prediction = new BHTResp(bhtHistoryLength, bhtCounterLength) + val pc = UInt(vaddrBits.W) + val branch = Bool() + val taken = Bool() + val mispredict = Bool() +} + +class RASUpdate(vaddrBits: Int) extends Bundle { + val cfiType = UInt(CFIType.width.W) + val returnAddr = UInt(vaddrBits.W) +} + +// TODO: make it Enum +object CFIType { + def width = 2 + def branch = 0.U + def jump = 1.U + def call = 2.U + def ret = 3.U +} From 16f87ad710e638de6671b84cbb2d8ab09069da86 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 16:30:02 +0800 Subject: [PATCH 036/140] [rocketv] add elaborator for BTB - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.BTB config --useAsyncReset true --fetchBytes 16 --vaddrBits 34 --entries 28 --nMatchBits 14 --nPages 6 --nRAS 6 --cacheBlockBytes 64 --iCacheSet 64 --useCompressed true --updatesOutOfOrder false --bht-nEntries 512 --bht-counterLength 1 --bht-historyLength 8 --bht-historyBits 3 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.BTB design --parameter ./BTB.json --run-firtool --- elaborator/src/rocketv/BTB.scala | 79 ++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 elaborator/src/rocketv/BTB.scala diff --git a/elaborator/src/rocketv/BTB.scala b/elaborator/src/rocketv/BTB.scala new file mode 100644 index 000000000..859e842e3 --- /dev/null +++ b/elaborator/src/rocketv/BTB.scala @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{BHTParameter, BTB, BTBParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object BTB extends Elaborator { + @main + case class BHTParameterMain( + @arg(name = "nEntries") nEntries: Int, + @arg(name = "counterLength") counterLength: Int, + @arg(name = "historyLength") historyLength: Int, + @arg(name = "historyBits") historyBits: Int) { + def convert: BHTParameter = BHTParameter( + nEntries, + counterLength, + historyLength, + historyBits + ) + } + implicit def BHTParameterMainParser: ParserForClass[BHTParameterMain] = ParserForClass[BHTParameterMain] + + @main + case class BTBParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "fetchBytes") fetchBytes: Int, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "entries") entries: Int, + @arg(name = "nMatchBits") nMatchBits: Int, + @arg(name = "nPages") nPages: Int, + @arg(name = "nRAS") nRAS: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "iCacheSet") iCacheSet: Int, + @arg(name = "useCompressed") useCompressed: Boolean, + @arg(name = "updatesOutOfOrder") updatesOutOfOrder: Boolean, + @arg(name = "bht-nEntries") nEntries: Option[Int], + @arg(name = "bht-counterLength") counterLength: Option[Int], + @arg(name = "bht-historyLength") historyLength: Option[Int], + @arg(name = "bht-historyBits") historyBits: Option[Int], + @arg(name = "fetchWidth") fetchWidth: Int, + ) { + def convert: BTBParameter = BTBParameter( + useAsyncReset, + fetchBytes, + vaddrBits, + entries, + nMatchBits, + nPages, + nRAS, + cacheBlockBytes, + iCacheSet, + useCompressed, + updatesOutOfOrder, + fetchWidth, + (nEntries + .lazyZip(counterLength) + .lazyZip(historyLength) + .lazyZip(historyBits)) + .map { + case (nEntries, counterLength, historyLength, historyBits) => + BHTParameter(nEntries, counterLength, historyLength, historyBits) + } + .headOption + ) + } + + implicit def BTBParameterMainParser: ParserForClass[BTBParameterMain] = ParserForClass[BTBParameterMain] + + @main + def config(@arg(name = "parameter") parameter: BTBParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[BTB, BTBParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 16b6bf5b87afe94d2deec3045f68aeec23a36b08 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 27 Jun 2024 17:04:12 +0800 Subject: [PATCH 037/140] [rocketv] copy CSR into rocketv project --- rocketv/src/CSR.scala | 1615 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1615 insertions(+) create mode 100644 rocketv/src/CSR.scala diff --git a/rocketv/src/CSR.scala b/rocketv/src/CSR.scala new file mode 100644 index 000000000..780968357 --- /dev/null +++ b/rocketv/src/CSR.scala @@ -0,0 +1,1615 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{log2Ceil, log2Up, BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid} +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.devices.debug.DebugModuleKey +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property + +import scala.collection.mutable.LinkedHashMap +// TODO: remove these +import freechips.rocketchip.rocket.{CSRs, Causes, CustomCSRs, DecodeLogic} +import freechips.rocketchip.rocket.Instructions._ +import freechips.rocketchip.rocket.CustomInstructions._ + +class MStatus extends Bundle { + // not truly part of mstatus, but convenient + val debug = Bool() + val cease = Bool() + val wfi = Bool() + val isa = UInt(32.W) + + val dprv = UInt(PRV.SZ.W) // effective prv for data accesses + val dv = Bool() // effective v for data accesses + val prv = UInt(PRV.SZ.W) + val v = Bool() + + val sd = Bool() + val zero2 = UInt(23.W) + val mpv = Bool() + val gva = Bool() + val mbe = Bool() + val sbe = Bool() + val sxl = UInt(2.W) + val uxl = UInt(2.W) + val sd_rv32 = Bool() + val zero1 = UInt(8.W) + val tsr = Bool() + val tw = Bool() + val tvm = Bool() + val mxr = Bool() + val sum = Bool() + val mprv = Bool() + val xs = UInt(2.W) + val fs = UInt(2.W) + val mpp = UInt(2.W) + val vs = UInt(2.W) + val spp = UInt(1.W) + val mpie = Bool() + val ube = Bool() + val spie = Bool() + val upie = Bool() + val mie = Bool() + val hie = Bool() + val sie = Bool() + val uie = Bool() +} + +class MNStatus extends Bundle { + val mpp = UInt(2.W) + val zero3 = UInt(3.W) + val mpv = Bool() + val zero2 = UInt(3.W) + val mie = Bool() + val zero1 = UInt(3.W) +} + +class HStatus extends Bundle { + val zero6 = UInt(30.W) + val vsxl = UInt(2.W) + val zero5 = UInt(9.W) + val vtsr = Bool() + val vtw = Bool() + val vtvm = Bool() + val zero3 = UInt(2.W) + val vgein = UInt(6.W) + val zero2 = UInt(2.W) + val hu = Bool() + val spvp = Bool() + val spv = Bool() + val gva = Bool() + val vsbe = Bool() + val zero1 = UInt(5.W) +} + +class DCSR extends Bundle { + val xdebugver = UInt(2.W) + val zero4 = UInt(2.W) + val zero3 = UInt(12.W) + val ebreakm = Bool() + val ebreakh = Bool() + val ebreaks = Bool() + val ebreaku = Bool() + val zero2 = Bool() + val stopcycle = Bool() + val stoptime = Bool() + val cause = UInt(3.W) + val v = Bool() + val zero1 = UInt(2.W) + val step = Bool() + val prv = UInt(PRV.SZ.W) +} + +class MIP(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { + val lip = Vec(coreParams.nLocalInterrupts, Bool()) + val zero1 = Bool() + val debug = Bool() // keep in sync with CSR.debugIntCause + val sgeip = Bool() + val meip = Bool() + val vseip = Bool() + val seip = Bool() + val ueip = Bool() + val mtip = Bool() + val vstip = Bool() + val stip = Bool() + val utip = Bool() + val msip = Bool() + val vssip = Bool() + val ssip = Bool() + val usip = Bool() +} + +class Envcfg extends Bundle { + val stce = Bool() // only for menvcfg/henvcfg + val pbmte = Bool() // only for menvcfg/henvcfg + val zero54 = UInt(54.W) + val cbze = Bool() + val cbcfe = Bool() + val cbie = UInt(2.W) + val zero3 = UInt(3.W) + val fiom = Bool() + def write(wdata: UInt) { + val new_envcfg = wdata.asTypeOf(new Envcfg) + fiom := new_envcfg.fiom // only FIOM is writable currently + } +} + +class PTBR(implicit p: Parameters) extends CoreBundle()(p) { + def additionalPgLevels = mode.extract(log2Ceil(pgLevels - minPgLevels + 1) - 1, 0) + def pgLevelsToMode(i: Int) = (xLen, i) match { + case (32, 2) => 1 + case (64, x) if x >= 3 && x <= 6 => x + 5 + } + val (modeBits, maxASIdBits) = xLen match { + case 32 => (1, 9) + case 64 => (4, 16) + } + require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen) + + val mode = UInt(modeBits.W) + val asid = UInt(maxASIdBits.W) + val ppn = UInt((maxPAddrBits - pgIdxBits).W) +} + +object PRV { + val SZ = 2 + val U = 0 + val S = 1 + val H = 2 + val M = 3 +} + +object CSR { + // commands + val SZ = 3 + def X = BitPat.dontCare(SZ) + def N = 0.U(SZ.W) + def R = 2.U(SZ.W) + def I = 4.U(SZ.W) + def W = 5.U(SZ.W) + def S = 6.U(SZ.W) + def C = 7.U(SZ.W) + + // mask a CSR cmd with a valid bit + def maskCmd(valid: Bool, cmd: UInt): UInt = { + // all commands less than CSR.I are treated by CSRFile as NOPs + cmd & ~Mux(valid, 0.U, CSR.I) + } + + val ADDRSZ = 12 + + def modeLSB: Int = 8 + def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) + def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB) + + def busErrorIntCause = 128 + def debugIntCause = 14 // keep in sync with MIP.debug + def debugTriggerCause = { + val res = debugIntCause + require(!(Causes.all contains res)) + res + } + def rnmiIntCause = 13 // NMI: Higher numbers = higher priority, must not reuse debugIntCause + def rnmiBEUCause = 12 + + val firstCtr = CSRs.cycle + val firstCtrH = CSRs.cycleh + val firstHPC = CSRs.hpmcounter3 + val firstHPCH = CSRs.hpmcounter3h + val firstHPE = CSRs.mhpmevent3 + val firstMHPC = CSRs.mhpmcounter3 + val firstMHPCH = CSRs.mhpmcounter3h + val firstHPM = 3 + val nCtr = 32 + val nHPM = nCtr - firstHPM + val hpmWidth = 40 + + val maxPMPs = 16 +} + +class PerfCounterIO(implicit p: Parameters) extends CoreBundle with HasCoreParameters { + val eventSel = Output(UInt(xLen.W)) + val inc = Input(UInt(log2Ceil(1 + retireWidth).W)) +} + +// CSR Interface with decode stage, basically check illegal +class CSRDecodeIO(implicit p: Parameters) extends CoreBundle { + val inst = Input(UInt(iLen.W)) + def csrAddr = (inst >> 20)(CSR.ADDRSZ - 1, 0) + val fpIllegal = Output(Bool()) + val fpCsr = Output(Bool()) + val readIllegal = Output(Bool()) + val writeIllegal = Output(Bool()) + val writeFlush = Output(Bool()) + val systemIllegal = Output(Bool()) + val virtualAccessIllegal = Output(Bool()) + val virtualSystemIllegal = Output(Bool()) +} + +class CSRFileIO(hasBeu: Boolean)(implicit p: Parameters) extends CoreBundle with HasCoreParameters { + val ungatedClock = Input(Clock()) + val interrupts = Input(new CoreInterrupts(hasBeu)) + val hartid = Input(UInt(hartIdLen.W)) + val rw = new Bundle { + val addr = Input(UInt(CSR.ADDRSZ.W)) + val cmd = Input(Bits(CSR.SZ.W)) + val rdata = Output(Bits(xLen.W)) + val wdata = Input(Bits(xLen.W)) + } + + val decode = Vec(decodeWidth, new CSRDecodeIO) + + val csrStall = Output(Bool()) // stall retire for wfi + val rwStall = Output(Bool()) // stall rw, rw will have no effect while rw_stall + val eret = Output(Bool()) + val singleStep = Output(Bool()) + + val status = Output(new MStatus()) + val hstatus = Output(new HStatus()) + val gstatus = Output(new MStatus()) + val ptbr = Output(new PTBR()) + val hgatp = Output(new PTBR()) + val vsatp = Output(new PTBR()) + val evec = Output(UInt(vaddrBitsExtended.W)) + val exception = Input(Bool()) + val retire = Input(UInt(log2Up(1 + retireWidth).W)) + val cause = Input(UInt(xLen.W)) + val pc = Input(UInt(vaddrBitsExtended.W)) + val tval = Input(UInt(vaddrBitsExtended.W)) + val htval = Input(UInt(((maxSVAddrBits + 1).min(xLen)).W)) + val gva = Input(Bool()) + val time = Output(UInt(xLen.W)) + val fcsrRm = Output(Bits(FPConstants.RM_SZ.W)) + val fcsrFlags = Flipped(Valid(Bits(FPConstants.FLAGS_SZ.W))) + val setFsDirty = coreParams.haveFSDirty.option(Input(Bool())) + val interrupt = Output(Bool()) + val interruptCause = Output(UInt(xLen.W)) + val bp = Output(Vec(nBreakpoints, new BP)) + val pmp = Output(Vec(nPMPs, new PMP)) + val counters = Vec(nPerfCounters, new PerfCounterIO) + val csrwCounter = Output(UInt(CSR.nCtr.W)) + val inhibitCycle = Output(Bool()) + val inst = Input(Vec(retireWidth, UInt(iLen.W))) + val mcontext = Output(UInt(coreParams.mcontextWidth.W)) + val scontext = Output(UInt(coreParams.scontextWidth.W)) + val fiom = Output(Bool()) + val vectorCsr = Option.when(usingVector)(Input(Bool())) + val wbRegRS2 = Option.when(usingVector)(Input(UInt())) +} + +/** + * https://github.com/riscv/riscv-isa-manual/blob/main/src/zicsr.adoc + */ +class CSRFile( + perfEventSets: EventSets = new EventSets(Seq()), + customCSRs: Seq[CustomCSR] = Nil, + hasBeu: Boolean +)( + implicit p: Parameters) + extends CoreModule()(p) + with HasCoreParameters { + val vector = Option.when(usingVector)(new csr.V(vLen, usingHypervisor)) + + val io = IO(new CSRFileIO(hasBeu) { + val customCSRs = Vec(CSRFile.this.customCSRs.size, new CustomCSRIO) + }) + + io.rwStall := false.B + + val reset_mstatus = WireDefault(0.U.asTypeOf(new MStatus())) + reset_mstatus.mpp := PRV.M.U + reset_mstatus.prv := PRV.M.U + reset_mstatus.xs := 0.U + val reg_mstatus = RegInit(reset_mstatus) + + val new_prv = WireDefault(reg_mstatus.prv) + reg_mstatus.prv := legalizePrivilege(new_prv) + + val reset_dcsr = WireDefault(0.U.asTypeOf(new DCSR())) + reset_dcsr.xdebugver := 1.U + reset_dcsr.prv := PRV.M.U + val reg_dcsr = RegInit(reset_dcsr) + + val (supported_interrupts, delegable_interrupts) = { + val sup = Wire(new MIP) + sup.usip := false.B + sup.ssip := usingSupervisor.B + sup.vssip := usingHypervisor.B + sup.msip := true.B + sup.utip := false.B + sup.stip := usingSupervisor.B + sup.vstip := usingHypervisor.B + sup.mtip := true.B + sup.ueip := false.B + sup.seip := usingSupervisor.B + sup.vseip := usingHypervisor.B + sup.meip := true.B + sup.sgeip := false.B + sup.debug := false.B + sup.zero1 := false.B + sup.lip.foreach { _ := true.B } + val supported_high_interrupts = + if (io.interrupts.buserror.nonEmpty && !usingNMI) (BigInt(1) << CSR.busErrorIntCause).U else 0.U + + val del = WireDefault(sup) + del.msip := false.B + del.mtip := false.B + del.meip := false.B + + (sup.asUInt | supported_high_interrupts, del.asUInt) + } + val delegable_base_exceptions = Seq( + Causes.misaligned_fetch, + Causes.fetch_page_fault, + Causes.breakpoint, + Causes.load_page_fault, + Causes.store_page_fault, + Causes.misaligned_load, + Causes.misaligned_store, + Causes.illegal_instruction, + Causes.user_ecall + ) + val delegable_hypervisor_exceptions = Seq( + Causes.virtual_supervisor_ecall, + Causes.fetch_guest_page_fault, + Causes.load_guest_page_fault, + Causes.virtual_instruction, + Causes.store_guest_page_fault + ) + val delegable_exceptions = ( + delegable_base_exceptions + ++ (if (usingHypervisor) delegable_hypervisor_exceptions else Seq()) + ).map(1 << _).sum.U + + val hs_delegable_exceptions = Seq( + Causes.misaligned_fetch, + Causes.fetch_access, + Causes.illegal_instruction, + Causes.breakpoint, + Causes.misaligned_load, + Causes.load_access, + Causes.misaligned_store, + Causes.store_access, + Causes.user_ecall, + Causes.fetch_page_fault, + Causes.load_page_fault, + Causes.store_page_fault + ).map(1 << _).sum.U + + val (hs_delegable_interrupts, mideleg_always_hs) = { + val always = WireDefault(0.U.asTypeOf(new MIP())) + always.vssip := usingHypervisor.B + always.vstip := usingHypervisor.B + always.vseip := usingHypervisor.B + + val deleg = WireDefault(always) + deleg.lip.foreach { _ := usingHypervisor.B } + + (deleg.asUInt, always.asUInt) + } + + val reg_debug = RegInit(false.B) + val reg_dpc = Reg(UInt(vaddrBitsExtended.W)) + val reg_dscratch0 = Reg(UInt(xLen.W)) + val reg_dscratch1 = (p(DebugModuleKey).map(_.nDscratch).getOrElse(1) > 1).option(Reg(UInt(xLen.W))) + val reg_singleStepped = Reg(Bool()) + + val reg_mcontext = (coreParams.mcontextWidth > 0).option(RegInit(0.U(coreParams.mcontextWidth.W))) + val reg_scontext = (coreParams.scontextWidth > 0).option(RegInit(0.U(coreParams.scontextWidth.W))) + + val reg_tselect = Reg(UInt(log2Up(nBreakpoints).W)) + val reg_bp = Reg(Vec(1 << log2Up(nBreakpoints), new BP)) + val reg_pmp = Reg(Vec(nPMPs, new PMPReg)) + + val reg_mie = Reg(UInt(xLen.W)) + val (reg_mideleg, read_mideleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingSupervisor.B, reg & delegable_interrupts | mideleg_always_hs, 0.U)) + } + val (reg_medeleg, read_medeleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingSupervisor.B, reg & delegable_exceptions, 0.U)) + } + val reg_mip = Reg(new MIP) + val reg_mepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_mcause = RegInit(0.U(xLen.W)) + val reg_mtval = Reg(UInt(vaddrBitsExtended.W)) + val reg_mtval2 = Reg(UInt(((maxSVAddrBits + 1).min(xLen)).W)) + val reg_mscratch = Reg(Bits(xLen.W)) + val mtvecWidth = paddrBits.min(xLen) + val reg_mtvec = mtvecInit match { + case Some(addr) => RegInit(addr.U(mtvecWidth.W)) + case None => Reg(UInt(mtvecWidth.W)) + } + + val reset_mnstatus = WireDefault(0.U.asTypeOf(new MNStatus())) + reset_mnstatus.mpp := PRV.M.U + val reg_mnscratch = Reg(Bits(xLen.W)) + val reg_mnepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_mncause = RegInit(0.U(xLen.W)) + val reg_mnstatus = RegInit(reset_mnstatus) + val reg_rnmie = RegInit(true.B) + val nmie = reg_rnmie + + val reg_menvcfg = RegInit(0.U.asTypeOf(new Envcfg)) + val reg_senvcfg = RegInit(0.U.asTypeOf(new Envcfg)) + val reg_henvcfg = RegInit(0.U.asTypeOf(new Envcfg)) + + val delegable_counters = ((BigInt(1) << (nPerfCounters + CSR.firstHPM)) - 1).U + val (reg_mcounteren, read_mcounteren) = { + val reg = Reg(UInt(32.W)) + (reg, Mux(usingUser.B, reg & delegable_counters, 0.U)) + } + val (reg_scounteren, read_scounteren) = { + val reg = Reg(UInt(32.W)) + (reg, Mux(usingSupervisor.B, reg & delegable_counters, 0.U)) + } + + val (reg_hideleg, read_hideleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingHypervisor.B, reg & hs_delegable_interrupts, 0.U)) + } + val (reg_hedeleg, read_hedeleg) = { + val reg = Reg(UInt(xLen.W)) + (reg, Mux(usingHypervisor.B, reg & hs_delegable_exceptions, 0.U)) + } + val hs_delegable_counters = delegable_counters + val (reg_hcounteren, read_hcounteren) = { + val reg = Reg(UInt(32.W)) + (reg, Mux(usingHypervisor.B, reg & hs_delegable_counters, 0.U)) + } + val reg_hstatus = RegInit(0.U.asTypeOf(new HStatus)) + val reg_hgatp = Reg(new PTBR) + val reg_htval = Reg(reg_mtval2.cloneType) + val read_hvip = reg_mip.asUInt & hs_delegable_interrupts + val read_hie = reg_mie & hs_delegable_interrupts + + val (reg_vstvec, read_vstvec) = { + val reg = Reg(UInt(vaddrBitsExtended.W)) + (reg, formTVec(reg).sextTo(xLen)) + } + val reg_vsstatus = Reg(new MStatus) + val reg_vsscratch = Reg(Bits(xLen.W)) + val reg_vsepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_vscause = Reg(Bits(xLen.W)) + val reg_vstval = Reg(UInt(vaddrBitsExtended.W)) + val reg_vsatp = Reg(new PTBR) + + val reg_sepc = Reg(UInt(vaddrBitsExtended.W)) + val reg_scause = Reg(Bits(xLen.W)) + val reg_stval = Reg(UInt(vaddrBitsExtended.W)) + val reg_sscratch = Reg(Bits(xLen.W)) + val reg_stvec = Reg(UInt((if (usingHypervisor) vaddrBitsExtended else vaddrBits).W)) + val reg_satp = Reg(new PTBR) + val reg_wfi = withClock(io.ungatedClock) { RegInit(false.B) } + + val reg_fflags = Reg(UInt(5.W)) + val reg_frm = Reg(UInt(3.W)) + + val reg_mcountinhibit = RegInit(0.U((CSR.firstHPM + nPerfCounters).W)) + io.inhibitCycle := reg_mcountinhibit(0) + val reg_instret = WideCounter(64, io.retire, inhibit = reg_mcountinhibit(2)) + val reg_cycle = + if (enableCommitLog) WideCounter(64, io.retire, inhibit = reg_mcountinhibit(0)) + else withClock(io.ungatedClock) { WideCounter(64, !io.csrStall, inhibit = reg_mcountinhibit(0)) } + val reg_hpmevent = io.counters.map(c => RegInit(0.U(xLen.W))) + (io.counters.zip(reg_hpmevent)).foreach { case (c, e) => c.eventSel := e } + val reg_hpmcounter = io.counters.zipWithIndex.map { + case (c, i) => + WideCounter(CSR.hpmWidth, c.inc, reset = false, inhibit = reg_mcountinhibit(CSR.firstHPM + i)) + } + + val mip = WireDefault(reg_mip) + mip.lip := (io.interrupts.lip: Seq[Bool]) + mip.mtip := io.interrupts.mtip + mip.msip := io.interrupts.msip + mip.meip := io.interrupts.meip + // seip is the OR of reg_mip.seip and the actual line from the PLIC + io.interrupts.seip.foreach { mip.seip := reg_mip.seip || _ } + // Simimlar sort of thing would apply if the PLIC had a VSEIP line: + //io.interrupts.vseip.foreach { mip.vseip := reg_mip.vseip || _ } + val read_mip = mip.asUInt & supported_interrupts + val read_hip = read_mip & hs_delegable_interrupts + val high_interrupts = (if (usingNMI) 0.U else io.interrupts.buserror.map(_ << CSR.busErrorIntCause).getOrElse(0.U)) + + val pending_interrupts = high_interrupts | (read_mip & reg_mie) + val d_interrupts = io.interrupts.debug << CSR.debugIntCause + val (nmi_interrupts, nmiFlag) = io.interrupts.nmi + .map(nmi => + ( + ((nmi.rnmi && reg_rnmie) << CSR.rnmiIntCause) | + io.interrupts.buserror.map(_ << CSR.rnmiBEUCause).getOrElse(0.U), + !io.interrupts.debug && nmi.rnmi && reg_rnmie + ) + ) + .getOrElse(0.U, false.B) + val m_interrupts = + Mux(nmie && (reg_mstatus.prv <= PRV.S.U || reg_mstatus.mie), ~(~pending_interrupts | read_mideleg), 0.U) + val s_interrupts = Mux( + nmie && (reg_mstatus.v || reg_mstatus.prv < PRV.S.U || (reg_mstatus.prv === PRV.S.U && reg_mstatus.sie)), + pending_interrupts & read_mideleg & ~read_hideleg, + 0.U + ) + val vs_interrupts = Mux( + nmie && (reg_mstatus.v && (reg_mstatus.prv < PRV.S.U || reg_mstatus.prv === PRV.S.U && reg_vsstatus.sie)), + pending_interrupts & read_hideleg, + 0.U + ) + val (anyInterrupt, whichInterrupt) = chooseInterrupt( + Seq(vs_interrupts, s_interrupts, m_interrupts, nmi_interrupts, d_interrupts) + ) + val interruptMSB = BigInt(1) << (xLen - 1) + val interruptCause = interruptMSB.U + (nmiFlag << (xLen - 2)) + whichInterrupt + io.interrupt := (anyInterrupt && !io.singleStep || reg_singleStepped) && !(reg_debug || io.status.cease) + io.interruptCause := interruptCause + io.bp := reg_bp.take(nBreakpoints) + io.mcontext := reg_mcontext.getOrElse(0.U) + io.scontext := reg_scontext.getOrElse(0.U) + io.fiom := (reg_mstatus.prv < PRV.M.U && reg_menvcfg.fiom) || (reg_mstatus.prv < PRV.S.U && reg_senvcfg.fiom) || (reg_mstatus.v && reg_henvcfg.fiom) + io.pmp := reg_pmp.map(PMP(_)) + + val isaMaskString = + (if (usingMulDiv) "M" else "") + + (if (usingAtomics) "A" else "") + + (if (fLen >= 32) "F" else "") + + (if (fLen >= 64) "D" else "") + + (if (usingVector) "V" else "") + + (if (usingCompressed) "C" else "") + val isaString = (if (coreParams.useRVE) "E" else "I") + + isaMaskString + + (if (customIsaExt.isDefined) "X" else "") + + (if (usingSupervisor) "S" else "") + + (if (usingHypervisor) "H" else "") + + (if (usingUser) "U" else "") + val isaMax = (BigInt(log2Ceil(xLen) - 4) << (xLen - 2)) | isaStringToMask(isaString) + val reg_misa = RegInit(isaMax.U) + val read_mstatus = io.status.asUInt + val read_mtvec = formTVec(reg_mtvec).padTo(xLen) + val read_stvec = formTVec(reg_stvec).sextTo(xLen) + + val read_mapping = LinkedHashMap[Int, Bits]( + CSRs.tselect -> reg_tselect, + CSRs.tdata1 -> reg_bp(reg_tselect).control.asUInt, + CSRs.tdata2 -> reg_bp(reg_tselect).address.sextTo(xLen), + CSRs.tdata3 -> reg_bp(reg_tselect).textra.asUInt, + CSRs.misa -> reg_misa, + CSRs.mstatus -> read_mstatus, + CSRs.mtvec -> read_mtvec, + CSRs.mip -> read_mip, + CSRs.mie -> reg_mie, + CSRs.mscratch -> reg_mscratch, + CSRs.mepc -> readEPC(reg_mepc).sextTo(xLen), + CSRs.mtval -> reg_mtval.sextTo(xLen), + CSRs.mcause -> reg_mcause, + CSRs.mhartid -> io.hartid + ) + + val debug_csrs = + if (!usingDebug) LinkedHashMap() + else + LinkedHashMap[Int, Bits]( + CSRs.dcsr -> reg_dcsr.asUInt, + CSRs.dpc -> readEPC(reg_dpc).sextTo(xLen), + CSRs.dscratch0 -> reg_dscratch0.asUInt + ) ++ + reg_dscratch1.map(r => CSRs.dscratch1 -> r) + + val read_mnstatus = WireInit(0.U.asTypeOf(new MNStatus())) + read_mnstatus.mpp := reg_mnstatus.mpp + read_mnstatus.mpv := reg_mnstatus.mpv + read_mnstatus.mie := reg_rnmie + val nmi_csrs = + if (!usingNMI) LinkedHashMap() + else + LinkedHashMap[Int, Bits]( + CustomCSRs.mnscratch -> reg_mnscratch, + CustomCSRs.mnepc -> readEPC(reg_mnepc).sextTo(xLen), + CustomCSRs.mncause -> reg_mncause, + CustomCSRs.mnstatus -> read_mnstatus.asUInt + ) + + val context_csrs = LinkedHashMap[Int, Bits]() ++ + reg_mcontext.map(r => CSRs.mcontext -> r) ++ + reg_scontext.map(r => CSRs.scontext -> r) + + val read_fcsr = Cat(reg_frm, reg_fflags) + val fp_csrs = LinkedHashMap[Int, Bits]() ++ + usingFPU.option(CSRs.fflags -> reg_fflags) ++ + usingFPU.option(CSRs.frm -> reg_frm) ++ + usingFPU.option(CSRs.fcsr -> read_fcsr) + + read_mapping ++= debug_csrs + read_mapping ++= nmi_csrs + read_mapping ++= context_csrs + read_mapping ++= fp_csrs + + // Vector read CSR logic injection + vector.foreach { v => + read_mapping ++= LinkedHashMap[Int, Bits]( + CSRs.vxsat -> v.states("vxsat"), + CSRs.vxrm -> v.states("vxrm"), + CSRs.vcsr -> v.states("vxrm") ## v.states("vxsat"), + CSRs.vstart -> v.states("vstart"), + CSRs.vtype -> v.states("vlmul") ## v.states("vsew") ## v.states("vta") ## v.states("vma") ## 0.U(23.W) ## v + .states("vill"), + CSRs.vl -> v.states("vl"), + CSRs.vlenb -> v.constants("vlenb") + ) + } + + if (coreParams.haveBasicCounters) { + read_mapping += CSRs.mcountinhibit -> reg_mcountinhibit + read_mapping += CSRs.mcycle -> reg_cycle + read_mapping += CSRs.minstret -> reg_instret + + for ( + ((e, c), i) <- (reg_hpmevent + .padTo(CSR.nHPM, 0.U) + .zip(reg_hpmcounter.map(x => x: UInt).padTo(CSR.nHPM, 0.U))) + .zipWithIndex + ) { + read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN + read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN + read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN + if (xLen == 32) { + read_mapping += (i + CSR.firstMHPCH) -> (c >> 32) // mhpmcounterNh + read_mapping += (i + CSR.firstHPCH) -> (c >> 32) // hpmcounterNh + } + } + + if (usingUser) { + read_mapping += CSRs.mcounteren -> read_mcounteren + } + read_mapping += CSRs.cycle -> reg_cycle + read_mapping += CSRs.instret -> reg_instret + + if (xLen == 32) { + read_mapping += CSRs.mcycleh -> (reg_cycle >> 32) + read_mapping += CSRs.minstreth -> (reg_instret >> 32) + read_mapping += CSRs.cycleh -> (reg_cycle >> 32) + read_mapping += CSRs.instreth -> (reg_instret >> 32) + } + } + + if (usingUser) { + read_mapping += CSRs.menvcfg -> reg_menvcfg.asUInt + if (xLen == 32) + read_mapping += CSRs.menvcfgh -> (reg_menvcfg.asUInt >> 32) + } + + val sie_mask = { + val sgeip_mask = WireInit(0.U.asTypeOf(new MIP)) + sgeip_mask.sgeip := true.B + read_mideleg & ~(hs_delegable_interrupts | sgeip_mask.asUInt) + } + if (usingSupervisor) { + val read_sie = reg_mie & sie_mask + val read_sip = read_mip & sie_mask + val read_sstatus = WireDefault(0.U.asTypeOf(new MStatus)) + read_sstatus.sd := io.status.sd + read_sstatus.uxl := io.status.uxl + read_sstatus.sd_rv32 := io.status.sd_rv32 + read_sstatus.mxr := io.status.mxr + read_sstatus.sum := io.status.sum + read_sstatus.xs := io.status.xs + read_sstatus.fs := io.status.fs + read_sstatus.vs := io.status.vs + read_sstatus.spp := io.status.spp + read_sstatus.spie := io.status.spie + read_sstatus.sie := io.status.sie + + read_mapping += CSRs.sstatus -> (read_sstatus.asUInt)(xLen - 1, 0) + read_mapping += CSRs.sip -> read_sip.asUInt + read_mapping += CSRs.sie -> read_sie.asUInt + read_mapping += CSRs.sscratch -> reg_sscratch + read_mapping += CSRs.scause -> reg_scause + read_mapping += CSRs.stval -> reg_stval.sextTo(xLen) + read_mapping += CSRs.satp -> reg_satp.asUInt + read_mapping += CSRs.sepc -> readEPC(reg_sepc).sextTo(xLen) + read_mapping += CSRs.stvec -> read_stvec + read_mapping += CSRs.scounteren -> read_scounteren + read_mapping += CSRs.mideleg -> read_mideleg + read_mapping += CSRs.medeleg -> read_medeleg + read_mapping += CSRs.senvcfg -> reg_senvcfg.asUInt + } + + val pmpCfgPerCSR = xLen / new PMPConfig().getWidth + def pmpCfgIndex(i: Int) = (xLen / 32) * (i / pmpCfgPerCSR) + if (reg_pmp.nonEmpty) { + require(reg_pmp.size <= CSR.maxPMPs) + val read_pmp = reg_pmp.padTo(CSR.maxPMPs, 0.U.asTypeOf(new PMP)) + for (i <- 0 until read_pmp.size by pmpCfgPerCSR) + read_mapping += (CSRs.pmpcfg0 + pmpCfgIndex(i)) -> read_pmp.map(_.cfg).slice(i, i + pmpCfgPerCSR).asUInt + for ((pmp, i) <- read_pmp.zipWithIndex) + read_mapping += (CSRs.pmpaddr0 + i) -> pmp.readAddr + } + + // implementation-defined CSRs + def generateCustomCSR(csr: CustomCSR, csr_io: CustomCSRIO) = { + require(csr.mask >= 0 && csr.mask.bitLength <= xLen) + require(!read_mapping.contains(csr.id)) + val reg = csr.init.map(init => RegInit(init.U(xLen.W))).getOrElse(Reg(UInt(xLen.W))) + val read = io.rw.cmd =/= CSR.N && io.rw.addr === csr.id.U + csr_io.ren := read + when(read && csr_io.stall) { io.rwStall := true.B } + read_mapping += csr.id -> reg + reg + } + val reg_custom = customCSRs.zip(io.customCSRs).map(t => generateCustomCSR(t._1, t._2)) + + if (usingHypervisor) { + read_mapping += CSRs.mtinst -> 0.U + read_mapping += CSRs.mtval2 -> reg_mtval2 + + val read_hstatus = io.hstatus.asUInt.extract(xLen - 1, 0) + + read_mapping += CSRs.hstatus -> read_hstatus + read_mapping += CSRs.hedeleg -> read_hedeleg + read_mapping += CSRs.hideleg -> read_hideleg + read_mapping += CSRs.hcounteren -> read_hcounteren + read_mapping += CSRs.hgatp -> reg_hgatp.asUInt + read_mapping += CSRs.hip -> read_hip + read_mapping += CSRs.hie -> read_hie + read_mapping += CSRs.hvip -> read_hvip + read_mapping += CSRs.hgeie -> 0.U + read_mapping += CSRs.hgeip -> 0.U + read_mapping += CSRs.htval -> reg_htval + read_mapping += CSRs.htinst -> 0.U + read_mapping += CSRs.henvcfg -> reg_henvcfg.asUInt + if (xLen == 32) + read_mapping += CSRs.henvcfgh -> (reg_henvcfg.asUInt >> 32) + + val read_vsie = (read_hie & read_hideleg) >> 1 + val read_vsip = (read_hip & read_hideleg) >> 1 + val read_vsepc = readEPC(reg_vsepc).sextTo(xLen) + val read_vstval = reg_vstval.sextTo(xLen) + val read_vsstatus = io.gstatus.asUInt.extract(xLen - 1, 0) + + read_mapping += CSRs.vsstatus -> read_vsstatus + read_mapping += CSRs.vsip -> read_vsip + read_mapping += CSRs.vsie -> read_vsie + read_mapping += CSRs.vsscratch -> reg_vsscratch + read_mapping += CSRs.vscause -> reg_vscause + read_mapping += CSRs.vstval -> read_vstval + read_mapping += CSRs.vsatp -> reg_vsatp.asUInt + read_mapping += CSRs.vsepc -> read_vsepc + read_mapping += CSRs.vstvec -> read_vstvec + } + + // mimpid, marchid, mvendorid, and mconfigptr are 0 unless overridden by customCSRs + Seq(CSRs.mimpid, CSRs.marchid, CSRs.mvendorid, CSRs.mconfigptr).foreach(id => read_mapping.getOrElseUpdate(id, 0.U)) + + val decoded_addr = { + val addr = Cat(io.status.v, io.rw.addr) + val pats = + for (((k, _), i) <- read_mapping.zipWithIndex) + yield (BitPat(k.U), (0 until read_mapping.size).map(j => BitPat((i == j).B))) + val decoded = DecodeLogic(addr, Seq.fill(read_mapping.size)(X), pats) + val unvirtualized_mapping = (for (((k, _), v) <- read_mapping.zip(decoded)) yield k -> v.asBool).toMap + + for ((k, v) <- unvirtualized_mapping) yield k -> { + val alt = CSR.mode(k) match { + case PRV.S => unvirtualized_mapping.lift(k + (1 << CSR.modeLSB)) + case PRV.H => unvirtualized_mapping.lift(k - (1 << CSR.modeLSB)) + case _ => None + } + alt.map(Mux(reg_mstatus.v, _, v)).getOrElse(v) + } + } + + val wdata = readModifyWriteCSR(io.rw.cmd, io.rw.rdata, io.rw.wdata) + + val system_insn = io.rw.cmd === CSR.I + val hlsv = Seq(HLV_B, HLV_BU, HLV_H, HLV_HU, HLV_W, HLV_WU, HLV_D, HSV_B, HSV_H, HSV_W, HSV_D, HLVX_HU, HLVX_WU) + val decode_table = Seq( + ECALL -> List(Y, N, N, N, N, N, N, N, N), + EBREAK -> List(N, Y, N, N, N, N, N, N, N), + MRET -> List(N, N, Y, N, N, N, N, N, N), + CEASE -> List(N, N, N, Y, N, N, N, N, N), + WFI -> List(N, N, N, N, Y, N, N, N, N) + ) ++ + usingDebug.option(DRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + usingNMI.option(MNRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + coreParams.haveCFlush.option(CFLUSH_D_L1 -> List(N, N, N, N, N, N, N, N, N)) ++ + usingSupervisor.option(SRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + usingVM.option(SFENCE_VMA -> List(N, N, N, N, N, Y, N, N, N)) ++ + usingHypervisor.option(HFENCE_VVMA -> List(N, N, N, N, N, N, Y, N, N)) ++ + usingHypervisor.option(HFENCE_GVMA -> List(N, N, N, N, N, N, N, Y, N)) ++ + (if (usingHypervisor) hlsv.map(_ -> List(N, N, N, N, N, N, N, N, Y)) else Seq()) + val insn_call :: insn_break :: insn_ret :: insn_cease :: insn_wfi :: _ :: _ :: _ :: _ :: Nil = { + val insn = ECALL.value.U | (io.rw.addr << 20) + DecodeLogic(insn, decode_table(0)._2.map(x => X), decode_table).map(system_insn && _.asBool) + } + + for (io_dec <- io.decode) { + val addr = io_dec.inst(31, 20) + + def decodeAny(m: LinkedHashMap[Int, Bits]): Bool = m.map { case (k: Int, _: Bits) => addr === k.U }.reduce(_ || _) + def decodeFast(s: Seq[Int]): Bool = DecodeLogic(addr, s.map(_.U), (read_mapping -- s).keys.toList.map(_.U)) + + val _ :: is_break :: is_ret :: _ :: is_wfi :: is_sfence :: is_hfence_vvma :: is_hfence_gvma :: is_hlsv :: Nil = + DecodeLogic(io_dec.inst, decode_table(0)._2.map(x => X), decode_table).map(_.asBool) + val is_counter = (addr.inRange(CSR.firstCtr.U, (CSR.firstCtr + CSR.nCtr).U) || addr.inRange( + CSR.firstCtrH.U, + (CSR.firstCtrH + CSR.nCtr).U + )) + + val allow_wfi = + (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !reg_mstatus.tw && (!reg_mstatus.v || !reg_hstatus.vtw) + val allow_sfence_vma = + (!usingVM).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtvm, reg_mstatus.tvm) + val allow_hfence_vvma = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U) + val allow_hlsv = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U || reg_hstatus.hu) + val allow_sret = + (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtsr, reg_mstatus.tsr) + val counter_addr = addr(log2Ceil(read_mcounteren.getWidth) - 1, 0) + val allow_counter = (reg_mstatus.prv > PRV.S.U || read_mcounteren(counter_addr)) && + (!usingSupervisor.B || reg_mstatus.prv >= PRV.S.U || read_scounteren(counter_addr)) && + (!usingHypervisor.B || !reg_mstatus.v || read_hcounteren(counter_addr)) + io_dec.fpIllegal := io.status.fs === 0.U || reg_mstatus.v && reg_vsstatus.fs === 0.U || !reg_misa('f' - 'a') + io_dec.fpCsr := decodeFast(fp_csrs.keys.toList) + val csr_addr_legal = reg_mstatus.prv >= CSR.mode(addr) || + usingHypervisor.B && !reg_mstatus.v && reg_mstatus.prv === PRV.S.U && CSR.mode(addr) === PRV.H.U + val csr_exists = decodeAny(read_mapping) + io_dec.readIllegal := !csr_addr_legal || + !csr_exists || + ((addr === CSRs.satp.U || addr === CSRs.hgatp.U) && !allow_sfence_vma) || + is_counter && !allow_counter || + decodeFast(debug_csrs.keys.toList) && !reg_debug || + io_dec.fpCsr && io_dec.fpIllegal || + // vector read CSR illegal: if address is in the vector CSR, + vector + .map(vector => + decodeFast(Seq(CSRs.vxsat, CSRs.vxrm, CSRs.vcsr, CSRs.vstart, CSRs.vtype, CSRs.vl, CSRs.vlenb)) && + vector.states("mstatus.VS") === 0.U && + !reg_misa('v' - 'a') + ) + .getOrElse(false.B) + io_dec.writeIllegal := addr(11, 10).andR + io_dec.writeFlush := { + val addr_m = addr | (PRV.M.U << CSR.modeLSB) + !(addr_m >= CSRs.mscratch.U && addr_m <= CSRs.mtval.U) + } + io_dec.systemIllegal := !csr_addr_legal && !is_hlsv || + is_wfi && !allow_wfi || + is_ret && !allow_sret || + is_ret && addr(10) && addr(7) && !reg_debug || + (is_sfence || is_hfence_gvma) && !allow_sfence_vma || + is_hfence_vvma && !allow_hfence_vvma || + is_hlsv && !allow_hlsv + + io_dec.virtualAccessIllegal := reg_mstatus.v && csr_exists && (CSR.mode(addr) === PRV.H.U || + is_counter && read_mcounteren(counter_addr) && (!read_hcounteren(counter_addr) || !reg_mstatus.prv( + 0 + ) && !read_scounteren(counter_addr)) || + CSR.mode(addr) === PRV.S.U && !reg_mstatus.prv(0) || + addr === CSRs.satp.U && reg_mstatus.prv(0) && reg_hstatus.vtvm) + + io_dec.virtualSystemIllegal := reg_mstatus.v && (is_hfence_vvma || + is_hfence_gvma || + is_hlsv || + is_wfi && (!reg_mstatus.prv(0) || !reg_mstatus.tw && reg_hstatus.vtw) || + is_ret && CSR.mode(addr) === PRV.S.U && (!reg_mstatus.prv(0) || reg_hstatus.vtsr) || + is_sfence && (!reg_mstatus.prv(0) || reg_hstatus.vtvm)) + } + + val cause = + Mux( + insn_call, + Causes.user_ecall.U + Mux(reg_mstatus.prv(0) && reg_mstatus.v, PRV.H.U, reg_mstatus.prv), + Mux[UInt](insn_break, Causes.breakpoint.U, io.cause) + ) + val cause_lsbs = cause(log2Ceil(1 + CSR.busErrorIntCause) - 1, 0) + val causeIsDebugInt = cause(xLen - 1) && cause_lsbs === CSR.debugIntCause.U + val causeIsDebugTrigger = !cause(xLen - 1) && cause_lsbs === CSR.debugTriggerCause.U + val causeIsDebugBreak = + !cause(xLen - 1) && insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)( + reg_mstatus.prv + ) + val trapToDebug = + usingDebug.B && (reg_singleStepped || causeIsDebugInt || causeIsDebugTrigger || causeIsDebugBreak || reg_debug) + val debugEntry = p(DebugModuleKey).map(_.debugEntry).getOrElse(BigInt(0x800)) + val debugException = p(DebugModuleKey).map(_.debugException).getOrElse(BigInt(0x808)) + val debugTVec = Mux(reg_debug, Mux(insn_break, debugEntry.U, debugException.U), debugEntry.U) + val delegate = usingSupervisor.B && reg_mstatus.prv <= PRV.S.U && Mux( + cause(xLen - 1), + read_mideleg(cause_lsbs), + read_medeleg(cause_lsbs) + ) + val delegateVS = reg_mstatus.v && delegate && Mux(cause(xLen - 1), read_hideleg(cause_lsbs), read_hedeleg(cause_lsbs)) + def mtvecBaseAlign = 2 + def mtvecInterruptAlign = { + require(reg_mip.getWidth <= xLen) + log2Ceil(xLen) + } + val notDebugTVec = { + val base = Mux(delegate, Mux(delegateVS, read_vstvec, read_stvec), read_mtvec) + val interruptOffset = cause(mtvecInterruptAlign - 1, 0) << mtvecBaseAlign + val interruptVec = Cat(base >> (mtvecInterruptAlign + mtvecBaseAlign), interruptOffset) + val doVector = base(0) && cause(cause.getWidth - 1) && (cause_lsbs >> mtvecInterruptAlign) === 0.U + Mux(doVector, interruptVec, base >> mtvecBaseAlign << mtvecBaseAlign) + } + + val causeIsRnmiInt = + cause(xLen - 1) && cause(xLen - 2) && (cause_lsbs === CSR.rnmiIntCause.U || cause_lsbs === CSR.rnmiBEUCause.U) + val causeIsRnmiBEU = cause(xLen - 1) && cause(xLen - 2) && cause_lsbs === CSR.rnmiBEUCause.U + val causeIsNmi = causeIsRnmiInt + val nmiTVecInt = io.interrupts.nmi.map(nmi => nmi.rnmi_interrupt_vector).getOrElse(0.U) + val nmiTVecXcpt = io.interrupts.nmi.map(nmi => nmi.rnmi_exception_vector).getOrElse(0.U) + val trapToNmiInt = usingNMI.B && causeIsNmi + val trapToNmiXcpt = usingNMI.B && !nmie + val trapToNmi = trapToNmiInt || trapToNmiXcpt + val nmiTVec = (Mux(causeIsNmi, nmiTVecInt, nmiTVecXcpt) >> 1) << 1 + + val tvec = Mux(trapToDebug, debugTVec, Mux(trapToNmi, nmiTVec, notDebugTVec)) + io.evec := tvec + io.ptbr := reg_satp + io.hgatp := reg_hgatp + io.vsatp := reg_vsatp + io.eret := insn_call || insn_break || insn_ret + io.singleStep := reg_dcsr.step && !reg_debug + io.status := reg_mstatus + io.status.sd := io.status.fs.andR || io.status.xs.andR || io.status.vs.andR + io.status.debug := reg_debug + io.status.isa := reg_misa + io.status.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U + io.status.sxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U + io.status.dprv := Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpp, reg_mstatus.prv) + io.status.dv := reg_mstatus.v || Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpv, false.B) + io.status.sd_rv32 := (xLen == 32).B && io.status.sd + io.status.mpv := reg_mstatus.mpv + io.status.gva := reg_mstatus.gva + io.status.vs := vector.map(vector => vector.states("mstatus.VS") << 9).getOrElse(0.U(2.W)) + io.hstatus := reg_hstatus + io.hstatus.vsxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U + io.gstatus := reg_vsstatus + io.gstatus.sd := io.gstatus.fs.andR || io.gstatus.xs.andR || io.gstatus.vs.andR + io.gstatus.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U + io.gstatus.sd_rv32 := (xLen == 32).B && io.gstatus.sd + + val exception = insn_call || insn_break || io.exception + assert( + PopCount(insn_ret :: insn_call :: insn_break :: io.exception :: Nil) <= 1.U, + "these conditions must be mutually exclusive" + ) + + when(insn_wfi && !io.singleStep && !reg_debug) { reg_wfi := true.B } + when(pending_interrupts.orR || io.interrupts.debug || exception) { reg_wfi := false.B } + io.interrupts.nmi.map(nmi => when(nmi.rnmi) { reg_wfi := false.B }) + + when(io.retire(0) || exception) { reg_singleStepped := true.B } + when(!io.singleStep) { reg_singleStepped := false.B } + assert(!io.singleStep || io.retire <= 1.U) + assert(!reg_singleStepped || io.retire === 0.U) + + val epc = formEPC(io.pc) + val tval = Mux(insn_break, epc, io.tval) + + when(exception) { + when(trapToDebug) { + when(!reg_debug) { + reg_mstatus.v := false.B + reg_debug := true.B + reg_dpc := epc + reg_dcsr.cause := Mux( + reg_singleStepped, + 4.U, + Mux(causeIsDebugInt, 3.U, Mux[UInt](causeIsDebugTrigger, 2.U, 1.U)) + ) + reg_dcsr.prv := trimPrivilege(reg_mstatus.prv) + reg_dcsr.v := reg_mstatus.v + new_prv := PRV.M.U + } + }.elsewhen(trapToNmiInt) { + when(reg_rnmie) { + reg_mstatus.v := false.B + reg_mnstatus.mpv := reg_mstatus.v + reg_rnmie := false.B + reg_mnepc := epc + reg_mncause := (BigInt(1) << (xLen - 1)).U | Mux(causeIsRnmiBEU, 3.U, 2.U) + reg_mnstatus.mpp := trimPrivilege(reg_mstatus.prv) + new_prv := PRV.M.U + } + }.elsewhen(delegateVS && nmie) { + reg_mstatus.v := true.B + reg_vsstatus.spp := reg_mstatus.prv + reg_vsepc := epc + reg_vscause := Mux(cause(xLen - 1), Cat(cause(xLen - 1, 2), 1.U(2.W)), cause) + reg_vstval := tval + reg_vsstatus.spie := reg_vsstatus.sie + reg_vsstatus.sie := false.B + new_prv := PRV.S.U + }.elsewhen(delegate && nmie) { + reg_mstatus.v := false.B + reg_hstatus.spvp := Mux(reg_mstatus.v, reg_mstatus.prv(0), reg_hstatus.spvp) + reg_hstatus.gva := io.gva + reg_hstatus.spv := reg_mstatus.v + reg_sepc := epc + reg_scause := cause + reg_stval := tval + reg_htval := io.htval + reg_mstatus.spie := reg_mstatus.sie + reg_mstatus.spp := reg_mstatus.prv + reg_mstatus.sie := false.B + new_prv := PRV.S.U + }.otherwise { + reg_mstatus.v := false.B + reg_mstatus.mpv := reg_mstatus.v + reg_mstatus.gva := io.gva + reg_mepc := epc + reg_mcause := cause + reg_mtval := tval + reg_mtval2 := io.htval + reg_mstatus.mpie := reg_mstatus.mie + reg_mstatus.mpp := trimPrivilege(reg_mstatus.prv) + reg_mstatus.mie := false.B + new_prv := PRV.M.U + } + } + + for (i <- 0 until supported_interrupts.getWidth) { + val en = + exception && (supported_interrupts & (BigInt(1) << i).U) =/= 0.U && cause === (BigInt(1) << (xLen - 1)).U + i.U + val delegable = (delegable_interrupts & (BigInt(1) << i).U) =/= 0.U + property.cover(en && !delegate, s"INTERRUPT_M_$i") + property.cover(en && delegable && delegate, s"INTERRUPT_S_$i") + } + for (i <- 0 until xLen) { + val supported_exceptions: BigInt = 0x8fe | + (if (usingCompressed && !coreParams.misaWritable) 0 else 1) | + (if (usingUser) 0x100 else 0) | + (if (usingSupervisor) 0x200 else 0) | + (if (usingVM) 0xb000 else 0) + if (((supported_exceptions >> i) & 1) != 0) { + val en = exception && cause === i.U + val delegable = (delegable_exceptions & (BigInt(1) << i).U) =/= 0.U + property.cover(en && !delegate, s"EXCEPTION_M_$i") + property.cover(en && delegable && delegate, s"EXCEPTION_S_$i") + } + } + + when(insn_ret) { + val ret_prv = WireInit(UInt(), DontCare) + when(usingSupervisor.B && !io.rw.addr(9)) { + when(!reg_mstatus.v) { + reg_mstatus.sie := reg_mstatus.spie + reg_mstatus.spie := true.B + reg_mstatus.spp := PRV.U.U + ret_prv := reg_mstatus.spp + reg_mstatus.v := usingHypervisor.B && reg_hstatus.spv + io.evec := readEPC(reg_sepc) + reg_hstatus.spv := false.B + }.otherwise { + reg_vsstatus.sie := reg_vsstatus.spie + reg_vsstatus.spie := true.B + reg_vsstatus.spp := PRV.U.U + ret_prv := reg_vsstatus.spp + reg_mstatus.v := usingHypervisor.B + io.evec := readEPC(reg_vsepc) + } + }.elsewhen(usingDebug.B && io.rw.addr(10) && io.rw.addr(7)) { + ret_prv := reg_dcsr.prv + reg_mstatus.v := usingHypervisor.B && reg_dcsr.v && reg_dcsr.prv <= PRV.S.U + reg_debug := false.B + io.evec := readEPC(reg_dpc) + }.elsewhen(usingNMI.B && io.rw.addr(10) && !io.rw.addr(7)) { + ret_prv := reg_mnstatus.mpp + reg_mstatus.v := usingHypervisor.B && reg_mnstatus.mpv && reg_mnstatus.mpp <= PRV.S.U + reg_rnmie := true.B + io.evec := readEPC(reg_mnepc) + }.otherwise { + reg_mstatus.mie := reg_mstatus.mpie + reg_mstatus.mpie := true.B + reg_mstatus.mpp := legalizePrivilege(PRV.U.U) + reg_mstatus.mpv := false.B + ret_prv := reg_mstatus.mpp + reg_mstatus.v := usingHypervisor.B && reg_mstatus.mpv && reg_mstatus.mpp <= PRV.S.U + io.evec := readEPC(reg_mepc) + } + + new_prv := ret_prv + when(usingUser.B && ret_prv <= PRV.S.U) { + reg_mstatus.mprv := false.B + } + } + + io.time := reg_cycle + io.csrStall := reg_wfi || io.status.cease + io.status.cease := RegEnable(true.B, false.B, insn_cease) + io.status.wfi := reg_wfi + + for ((io, reg) <- io.customCSRs.zip(reg_custom)) { + io.wen := false.B + io.wdata := wdata + io.value := reg + } + + val setVlReadData: UInt = Wire(UInt(xLen.W)) + io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v).asUInt | setVlReadData + + // cover access to register + val coverable_counters = read_mapping.filterNot { + case (k, _) => + k >= CSR.firstHPC + nPerfCounters && k < CSR.firstHPC + CSR.nHPM + } + coverable_counters.foreach({ + case (k, v) => { + when(!k.U(11, 10).andR) { // Cover points for RW CSR registers + property.cover( + io.rw.cmd.isOneOf(CSR.W, CSR.S, CSR.C) && io.rw.addr === k.U, + "CSR_access_" + k.toString, + "Cover Accessing Core CSR field" + ) + }.otherwise { // Cover points for RO CSR registers + property.cover( + io.rw.cmd === CSR.R && io.rw.addr === k.U, + "CSR_access_" + k.toString, + "Cover Accessing Core CSR field" + ) + } + } + }) + + val set_fs_dirty = WireDefault(io.setFsDirty.getOrElse(false.B)) + if (coreParams.haveFSDirty) { + when(set_fs_dirty) { + assert(reg_mstatus.fs > 0.U) + when(reg_mstatus.v) { reg_vsstatus.fs := 3.U } + reg_mstatus.fs := 3.U + } + } + + io.fcsrRm := reg_frm + when(io.fcsrFlags.valid) { + reg_fflags := reg_fflags | io.fcsrFlags.bits + set_fs_dirty := true.B + } + + val csr_wen = io.rw.cmd.isOneOf(CSR.S, CSR.C, CSR.W) && !io.rwStall + io.csrwCounter := Mux( + coreParams.haveBasicCounters.B && csr_wen && (io.rw.addr.inRange( + CSRs.mcycle.U, + (CSRs.mcycle + CSR.nCtr).U + ) || io.rw.addr.inRange(CSRs.mcycleh.U, (CSRs.mcycleh + CSR.nCtr).U)), + UIntToOH(io.rw.addr(log2Ceil(CSR.nCtr + nPerfCounters) - 1, 0)), + 0.U + ) + when(csr_wen) { + val scause_mask = ((BigInt(1) << (xLen - 1)) + 31).U /* only implement 5 LSBs and MSB */ + val satp_valid_modes = 0 +: (minPgLevels to pgLevels).map(new PTBR().pgLevelsToMode(_)) + + when(decoded_addr(CSRs.mstatus)) { + val new_mstatus = wdata.asTypeOf(new MStatus()) + reg_mstatus.mie := new_mstatus.mie + reg_mstatus.mpie := new_mstatus.mpie + + if (usingUser) { + reg_mstatus.mprv := new_mstatus.mprv + reg_mstatus.mpp := legalizePrivilege(new_mstatus.mpp) + if (usingSupervisor) { + reg_mstatus.spp := new_mstatus.spp + reg_mstatus.spie := new_mstatus.spie + reg_mstatus.sie := new_mstatus.sie + reg_mstatus.tw := new_mstatus.tw + reg_mstatus.tsr := new_mstatus.tsr + } + if (usingVM) { + reg_mstatus.mxr := new_mstatus.mxr + reg_mstatus.sum := new_mstatus.sum + reg_mstatus.tvm := new_mstatus.tvm + } + if (usingHypervisor) { + reg_mstatus.mpv := new_mstatus.mpv + reg_mstatus.gva := new_mstatus.gva + } + } + + if (usingSupervisor || usingFPU) reg_mstatus.fs := formFS(new_mstatus.fs) + + vector.map(vector => vector.states("mstatus.VS") := new_mstatus.vs) + } + when(decoded_addr(CSRs.misa)) { + val mask = isaStringToMask(isaMaskString).U(xLen.W) + val f = wdata('f' - 'a') + // suppress write if it would cause the next fetch to be misaligned + when(!usingCompressed.B || !io.pc(1) || wdata('c' - 'a')) { + if (coreParams.misaWritable) + reg_misa := ~(~wdata | (!f << ('d' - 'a'))) & mask | reg_misa & ~mask + } + } + when(decoded_addr(CSRs.mip)) { + // MIP should be modified based on the value in reg_mip, not the value + // in read_mip, since read_mip.seip is the OR of reg_mip.seip and + // io.interrupts.seip. We don't want the value on the PLIC line to + // inadvertently be OR'd into read_mip.seip. + val new_mip = readModifyWriteCSR(io.rw.cmd, reg_mip.asUInt, io.rw.wdata).asTypeOf(new MIP) + if (usingSupervisor) { + reg_mip.ssip := new_mip.ssip + reg_mip.stip := new_mip.stip + reg_mip.seip := new_mip.seip + } + if (usingHypervisor) { + reg_mip.vssip := new_mip.vssip + } + } + when(decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts } + when(decoded_addr(CSRs.mepc)) { reg_mepc := formEPC(wdata) } + when(decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } + if (mtvecWritable) + when(decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata } + when(decoded_addr(CSRs.mcause)) { + reg_mcause := wdata & ((BigInt(1) << (xLen - 1)) + (BigInt(1) << whichInterrupt.getWidth) - 1).U + } + when(decoded_addr(CSRs.mtval)) { reg_mtval := wdata } + + if (usingNMI) { + val new_mnstatus = wdata.asTypeOf(new MNStatus()) + when(decoded_addr(CustomCSRs.mnscratch)) { reg_mnscratch := wdata } + when(decoded_addr(CustomCSRs.mnepc)) { reg_mnepc := formEPC(wdata) } + when(decoded_addr(CustomCSRs.mncause)) { reg_mncause := wdata & ((BigInt(1) << (xLen - 1)) + BigInt(3)).U } + when(decoded_addr(CustomCSRs.mnstatus)) { + reg_mnstatus.mpp := legalizePrivilege(new_mnstatus.mpp) + reg_mnstatus.mpv := usingHypervisor.B && new_mnstatus.mpv + reg_rnmie := reg_rnmie | new_mnstatus.mie // mnie bit settable but not clearable from software + } + } + + for (((e, c), i) <- (reg_hpmevent.zip(reg_hpmcounter)).zipWithIndex) { + writeCounter(i + CSR.firstMHPC, c, wdata) + when(decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) } + } + if (coreParams.haveBasicCounters) { + when(decoded_addr(CSRs.mcountinhibit)) { + reg_mcountinhibit := wdata & ~2.U(xLen.W) + } // mcountinhibit bit [1] is tied zero + writeCounter(CSRs.mcycle, reg_cycle, wdata) + writeCounter(CSRs.minstret, reg_instret, wdata) + } + + if (usingFPU) { + when(decoded_addr(CSRs.fflags)) { set_fs_dirty := true.B; reg_fflags := wdata } + when(decoded_addr(CSRs.frm)) { set_fs_dirty := true.B; reg_frm := wdata } + when(decoded_addr(CSRs.fcsr)) { + set_fs_dirty := true.B + reg_fflags := wdata + reg_frm := wdata >> reg_fflags.getWidth + } + } + if (usingDebug) { + when(decoded_addr(CSRs.dcsr)) { + val new_dcsr = wdata.asTypeOf(new DCSR()) + reg_dcsr.step := new_dcsr.step + reg_dcsr.ebreakm := new_dcsr.ebreakm + if (usingSupervisor) reg_dcsr.ebreaks := new_dcsr.ebreaks + if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku + if (usingUser) reg_dcsr.prv := legalizePrivilege(new_dcsr.prv) + if (usingHypervisor) reg_dcsr.v := new_dcsr.v + } + when(decoded_addr(CSRs.dpc)) { reg_dpc := formEPC(wdata) } + when(decoded_addr(CSRs.dscratch0)) { reg_dscratch0 := wdata } + reg_dscratch1.foreach { r => + when(decoded_addr(CSRs.dscratch1)) { r := wdata } + } + } + if (usingSupervisor) { + when(decoded_addr(CSRs.sstatus)) { + val new_sstatus = wdata.asTypeOf(new MStatus()) + reg_mstatus.sie := new_sstatus.sie + reg_mstatus.spie := new_sstatus.spie + reg_mstatus.spp := new_sstatus.spp + reg_mstatus.fs := formFS(new_sstatus.fs) + if (usingVM) { + reg_mstatus.mxr := new_sstatus.mxr + reg_mstatus.sum := new_sstatus.sum + } + } + when(decoded_addr(CSRs.sip)) { + val new_sip = ((read_mip & ~read_mideleg) | (wdata & read_mideleg)).asTypeOf(new MIP()) + reg_mip.ssip := new_sip.ssip + } + when(decoded_addr(CSRs.satp)) { + if (usingVM) { + val new_satp = wdata.asTypeOf(new PTBR()) + when(new_satp.mode.isOneOf(satp_valid_modes.map(_.U))) { + reg_satp.mode := new_satp.mode & satp_valid_modes.reduce(_ | _).U + reg_satp.ppn := new_satp.ppn(ppnBits - 1, 0) + if (asIdBits > 0) reg_satp.asid := new_satp.asid(asIdBits - 1, 0) + } + } + } + when(decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~sie_mask) | (wdata & sie_mask) } + when(decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } + when(decoded_addr(CSRs.sepc)) { reg_sepc := formEPC(wdata) } + when(decoded_addr(CSRs.stvec)) { reg_stvec := wdata } + when(decoded_addr(CSRs.scause)) { reg_scause := wdata & scause_mask } + when(decoded_addr(CSRs.stval)) { reg_stval := wdata } + when(decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata } + when(decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata } + when(decoded_addr(CSRs.scounteren)) { reg_scounteren := wdata } + when(decoded_addr(CSRs.senvcfg)) { reg_senvcfg.write(wdata) } + } + + if (usingHypervisor) { + when(decoded_addr(CSRs.hstatus)) { + val new_hstatus = wdata.asTypeOf(new HStatus()) + reg_hstatus.gva := new_hstatus.gva + reg_hstatus.spv := new_hstatus.spv + reg_hstatus.spvp := new_hstatus.spvp + reg_hstatus.hu := new_hstatus.hu + reg_hstatus.vtvm := new_hstatus.vtvm + reg_hstatus.vtw := new_hstatus.vtw + reg_hstatus.vtsr := new_hstatus.vtsr + reg_hstatus.vsxl := new_hstatus.vsxl + } + when(decoded_addr(CSRs.hideleg)) { reg_hideleg := wdata } + when(decoded_addr(CSRs.hedeleg)) { reg_hedeleg := wdata } + when(decoded_addr(CSRs.hgatp)) { + val new_hgatp = wdata.asTypeOf(new PTBR()) + val valid_modes = 0 +: (minPgLevels to pgLevels).map(new_hgatp.pgLevelsToMode(_)) + when(new_hgatp.mode.isOneOf(valid_modes.map(_.U))) { + reg_hgatp.mode := new_hgatp.mode & valid_modes.reduce(_ | _).U + } + reg_hgatp.ppn := Cat(new_hgatp.ppn(ppnBits - 1, 2), 0.U(2.W)) + if (vmIdBits > 0) reg_hgatp.asid := new_hgatp.asid(vmIdBits - 1, 0) + } + when(decoded_addr(CSRs.hip)) { + val new_hip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP()) + reg_mip.vssip := new_hip.vssip + } + when(decoded_addr(CSRs.hie)) { + reg_mie := (reg_mie & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts) + } + when(decoded_addr(CSRs.hvip)) { + val new_sip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP()) + reg_mip.vssip := new_sip.vssip + reg_mip.vstip := new_sip.vstip + reg_mip.vseip := new_sip.vseip + } + when(decoded_addr(CSRs.hcounteren)) { reg_hcounteren := wdata } + when(decoded_addr(CSRs.htval)) { reg_htval := wdata } + when(decoded_addr(CSRs.mtval2)) { reg_mtval2 := wdata } + + when(decoded_addr(CSRs.vsstatus)) { + val new_vsstatus = wdata.asTypeOf(new MStatus()) + reg_vsstatus.sie := new_vsstatus.sie + reg_vsstatus.spie := new_vsstatus.spie + reg_vsstatus.spp := new_vsstatus.spp + reg_vsstatus.mxr := new_vsstatus.mxr + reg_vsstatus.sum := new_vsstatus.sum + reg_vsstatus.fs := formFS(new_vsstatus.fs) + } + when(decoded_addr(CSRs.vsip)) { + val new_vsip = ((read_hip & ~read_hideleg) | ((wdata << 1) & read_hideleg)).asTypeOf(new MIP()) + reg_mip.vssip := new_vsip.vssip + } + when(decoded_addr(CSRs.vsatp)) { + val new_vsatp = wdata.asTypeOf(new PTBR()) + val mode_ok = new_vsatp.mode.isOneOf(satp_valid_modes.map(_.U)) + when(mode_ok) { + reg_vsatp.mode := new_vsatp.mode & satp_valid_modes.reduce(_ | _).U + } + when(mode_ok || !reg_mstatus.v) { + reg_vsatp.ppn := new_vsatp.ppn(vpnBits.min(new_vsatp.ppn.getWidth) - 1, 0) + if (asIdBits > 0) reg_vsatp.asid := new_vsatp.asid(asIdBits - 1, 0) + } + } + when(decoded_addr(CSRs.vsie)) { reg_mie := (reg_mie & ~read_hideleg) | ((wdata << 1) & read_hideleg) } + when(decoded_addr(CSRs.vsscratch)) { reg_vsscratch := wdata } + when(decoded_addr(CSRs.vsepc)) { reg_vsepc := formEPC(wdata) } + when(decoded_addr(CSRs.vstvec)) { reg_vstvec := wdata } + when(decoded_addr(CSRs.vscause)) { reg_vscause := wdata & scause_mask } + when(decoded_addr(CSRs.vstval)) { reg_vstval := wdata } + when(decoded_addr(CSRs.henvcfg)) { reg_henvcfg.write(wdata) } + } + if (usingUser) { + when(decoded_addr(CSRs.mcounteren)) { reg_mcounteren := wdata } + when(decoded_addr(CSRs.menvcfg)) { reg_menvcfg.write(wdata) } + } + if (nBreakpoints > 0) { + when(decoded_addr(CSRs.tselect)) { reg_tselect := wdata } + + for ((bp, i) <- reg_bp.zipWithIndex) { + when(i.U === reg_tselect && (!bp.control.dmode || reg_debug)) { + when(decoded_addr(CSRs.tdata2)) { bp.address := wdata } + when(decoded_addr(CSRs.tdata3)) { + if (coreParams.mcontextWidth > 0) { + bp.textra.mselect := wdata(bp.textra.mselectPos) + bp.textra.mvalue := wdata >> bp.textra.mvaluePos + } + if (coreParams.scontextWidth > 0) { + bp.textra.sselect := wdata(bp.textra.sselectPos) + bp.textra.svalue := wdata >> bp.textra.svaluePos + } + } + when(decoded_addr(CSRs.tdata1)) { + bp.control := wdata.asTypeOf(bp.control) + + val prevChain = if (i == 0) false.B else reg_bp(i - 1).control.chain + val prevDMode = if (i == 0) false.B else reg_bp(i - 1).control.dmode + val nextChain = if (i >= nBreakpoints - 1) true.B else reg_bp(i + 1).control.chain + val nextDMode = if (i >= nBreakpoints - 1) true.B else reg_bp(i + 1).control.dmode + val newBPC = readModifyWriteCSR(io.rw.cmd, bp.control.asUInt, io.rw.wdata).asTypeOf(bp.control) + val dMode = newBPC.dmode && reg_debug && (prevDMode || !prevChain) + bp.control.dmode := dMode + when(dMode || (newBPC.action > 1.U)) { bp.control.action := newBPC.action }.otherwise { + bp.control.action := 0.U + } + bp.control.chain := newBPC.chain && !(prevChain || nextChain) && (dMode || !nextDMode) + } + } + } + } + reg_mcontext.foreach { r => when(decoded_addr(CSRs.mcontext)) { r := wdata } } + reg_scontext.foreach { r => when(decoded_addr(CSRs.scontext)) { r := wdata } } + if (reg_pmp.nonEmpty) for (((pmp, next), i) <- (reg_pmp.zip(reg_pmp.tail :+ reg_pmp.last)).zipWithIndex) { + require(xLen % pmp.cfg.getWidth == 0) + when(decoded_addr(CSRs.pmpcfg0 + pmpCfgIndex(i)) && !pmp.cfgLocked) { + val newCfg = (wdata >> ((i * pmp.cfg.getWidth) % xLen)).asTypeOf(new PMPConfig()) + pmp.cfg := newCfg + // disallow unreadable but writable PMPs + pmp.cfg.w := newCfg.w && newCfg.r + // can't select a=NA4 with coarse-grained PMPs + if (pmpGranularity.log2 > PMP.lgAlign) + pmp.cfg.a := Cat(newCfg.a(1), newCfg.a.orR) + } + when(decoded_addr(CSRs.pmpaddr0 + i) && !pmp.addrLocked(next)) { + pmp.addr := wdata + } + } + def writeCustomCSR(io: CustomCSRIO, csr: CustomCSR, reg: UInt) = { + val mask = csr.mask.U(xLen.W) + when(decoded_addr(csr.id)) { + reg := (wdata & mask) | (reg & ~mask) + io.wen := true.B + } + } + for ((io, csr, reg) <- (io.customCSRs, customCSRs, reg_custom).zipped) { + writeCustomCSR(io, csr, reg) + } + + } + + // update csr for vector + if (usingVector) { + // set vl type + val vsetvli = !io.inst(0)(31) + val vsetivli = io.inst(0)(31, 30).andR + val vsetvl = io.inst(0)(31) && !io.inst(0)(30) + val rs1IsZero = io.inst(0)(19, 15) === 0.U + val rdIsZero = io.inst(0)(11, 7) === 0.U + // v type set + val newVType = Mux1H(Seq( + (vsetvli || vsetivli) -> io.inst(0)(27, 20), + vsetvl -> io.wbRegRS2.get(7, 0) + )) + // vlmax = vlen * lmul / sew + val vlmax: UInt = (true.B << (log2Ceil(vLen) - 6) << (newVType(2, 0) + 3.U) >> newVType(5, 3)).asUInt + // set vl + val setVL = Mux1H(Seq( + ((vsetvli || vsetvl) && !rs1IsZero) -> Mux(io.rw.wdata > vlmax, vlmax, io.rw.wdata), + ((vsetvli || vsetvl) && rs1IsZero && !rdIsZero) -> vlmax, + ((vsetvli || vsetvl) && rs1IsZero && rdIsZero) -> vector.get.states("vl"), + vsetivli -> io.inst(0)(19, 15) + )) + setVlReadData := Mux(io.retire(0) && io.vectorCsr.getOrElse(false.B), setVL, 0.U) + when(io.retire(0) && io.vectorCsr.get) { + vector.get.states("vl") := setVL + vector.get.states("vlmul") := newVType(2, 0) + vector.get.states("vsew") := newVType(5, 3) + vector.get.states("vta") := newVType(6) + vector.get.states("vma") := newVType(7) + } + } else { + setVlReadData := 0.U + } + def setCustomCSR(io: CustomCSRIO, csr: CustomCSR, reg: UInt) = { + val mask = csr.mask.U(xLen.W) + when(io.set) { + reg := (io.sdata & mask) | (reg & ~mask) + } + } + for ((io, csr, reg) <- (io.customCSRs, customCSRs, reg_custom).zipped) { + setCustomCSR(io, csr, reg) + } + + when(reset.asBool) { + reg_satp.mode := 0.U + reg_vsatp.mode := 0.U + reg_hgatp.mode := 0.U + } + if (!usingVM) { + reg_satp.mode := 0.U + reg_satp.ppn := 0.U + reg_satp.asid := 0.U + } + if (!usingHypervisor) { + reg_vsatp.mode := 0.U + reg_vsatp.ppn := 0.U + reg_vsatp.asid := 0.U + reg_hgatp.mode := 0.U + reg_hgatp.ppn := 0.U + reg_hgatp.asid := 0.U + } + if (!(asIdBits > 0)) { + reg_satp.asid := 0.U + reg_vsatp.asid := 0.U + } + if (!(vmIdBits > 0)) { + reg_hgatp.asid := 0.U + } + reg_vsstatus.xs := 0.U + + if (nBreakpoints <= 1) reg_tselect := 0.U + for (bpc <- reg_bp.map { _.control }) { + bpc.ttype := bpc.tType.U + bpc.maskmax := bpc.maskMax.U + bpc.reserved := 0.U + bpc.zero := 0.U + bpc.h := false.B + if (!usingSupervisor) bpc.s := false.B + if (!usingUser) bpc.u := false.B + if (!usingSupervisor && !usingUser) bpc.m := true.B + when(reset.asBool) { + bpc.action := 0.U + bpc.dmode := false.B + bpc.chain := false.B + bpc.r := false.B + bpc.w := false.B + bpc.x := false.B + } + } + for (bpx <- reg_bp.map { _.textra }) { + if (coreParams.mcontextWidth == 0) bpx.mselect := false.B + if (coreParams.scontextWidth == 0) bpx.sselect := false.B + } + for (bp <- reg_bp.drop(nBreakpoints)) + bp := 0.U.asTypeOf(new BP()) + for (pmp <- reg_pmp) { + pmp.cfg.res := 0.U + when(reset.asBool) { pmp.reset() } + } + + def chooseInterrupt(masksIn: Seq[UInt]): (Bool, UInt) = { + val nonstandard = supported_interrupts.getWidth - 1 to 12 by -1 + // MEI, MSI, MTI, SEI, SSI, STI, VSEI, VSSI, VSTI, UEI, USI, UTI + val standard = Seq(11, 3, 7, 9, 1, 5, 10, 2, 6, 8, 0, 4) + val priority = nonstandard ++ standard + val masks = masksIn.reverse + val any = masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => m(i))).reduce(_ || _) + val which = PriorityMux(masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => (m(i), i.U)))) + (any, which) + } + + def readModifyWriteCSR(cmd: UInt, rdata: UInt, wdata: UInt) = { + (Mux(cmd(1), rdata, 0.U) | wdata) & ~Mux(cmd(1, 0).andR, wdata, 0.U) + } + + def legalizePrivilege(priv: UInt): UInt = + if (usingSupervisor) Mux(priv === PRV.H.U, PRV.U.U, priv) + else if (usingUser) Fill(2, priv(0)) + else PRV.M.U + + def trimPrivilege(priv: UInt): UInt = + if (usingSupervisor) priv + else legalizePrivilege(priv) + + def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = { + if (xLen == 32) { + val hi = lo + CSRs.mcycleh - CSRs.mcycle + when(decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth - 1, 32), wdata) } + when(decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth - 33, 0), ctr(31, 0)) } + } else { + when(decoded_addr(lo)) { ctr := wdata(ctr.getWidth - 1, 0) } + } + } + def formEPC(x: UInt) = ~(~x | (if (usingCompressed) 1.U else 3.U)) + def readEPC(x: UInt) = ~(~x | Mux(reg_misa('c' - 'a'), 1.U, 3.U)) + def formTVec(x: UInt) = x.andNot(Mux(x(0), ((((BigInt(1) << mtvecInterruptAlign) - 1) << mtvecBaseAlign) | 2).U, 2.U)) + def isaStringToMask(s: String) = s.map(x => 1 << (x - 'A')).foldLeft(0)(_ | _) + def formFS(fs: UInt) = if (coreParams.haveFSDirty) fs else Fill(2, fs.orR) +} From af69b080c027176d699570b3934d256c964e7c11 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 28 Jun 2024 16:36:13 +0800 Subject: [PATCH 038/140] [rocketv] migrate CSR --- rocketv/src/Bundle.scala | 310 +++++++++++ rocketv/src/CSR.scala | 978 +++++++++++++++++++++------------- rocketv/src/DecodeLogic.scala | 59 ++ rocketv/src/RVDecoderDB.scala | 949 +++++++++++++++++++++++++++++++++ rocketv/src/csr/V.scala | 108 ++++ 5 files changed, 2023 insertions(+), 381 deletions(-) create mode 100644 rocketv/src/DecodeLogic.scala create mode 100644 rocketv/src/RVDecoderDB.scala create mode 100644 rocketv/src/csr/V.scala diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 1428b5982..33f128b93 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -219,3 +219,313 @@ object CFIType { def call = 2.U def ret = 3.U } + +class CustomCSRIO(xLen: Int) extends Bundle { + val ren = Output(Bool()) // set by CSRFile, indicates an instruction is reading the CSR + val wen = Output(Bool()) // set by CSRFile, indicates an instruction is writing the CSR + val wdata = Output(UInt(xLen.W)) // wdata provided by instruction writing CSR + val value = Output(UInt(xLen.W)) // current value of CSR in CSRFile + + val stall = Input(Bool()) // reads and writes to this CSR should stall (must be bounded) + + val set = Input(Bool()) // set/sdata enables external agents to set the value of this CSR + val sdata = Input(UInt(xLen.W)) +} + +class CustomCSRs(xLen: Int) extends Bundle { + val csrs = Vec(decls.size, new CustomCSRIO(xLen)) + + // Not all cores have these CSRs, but those that do should follow the same + // numbering conventions. So we list them here but default them to None. + protected def bpmCSRId = 0x7c0 + protected def bpmCSR: Option[CustomCSR] = None + protected def chickenCSRId = 0x7c1 + protected def chickenCSR: Option[CustomCSR] = None + // If you override this, you'll want to concatenate super.decls + def decls: Seq[CustomCSR] = bpmCSR.toSeq ++ chickenCSR + def flushBTB = getOrElse(bpmCSR, _.wen, false.B) + def bpmStatic = getOrElse(bpmCSR, _.value(0), false.B) + def disableDCacheClockGate = getOrElse(chickenCSR, _.value(0), false.B) + def disableICacheClockGate = getOrElse(chickenCSR, _.value(1), false.B) + def disableCoreClockGate = getOrElse(chickenCSR, _.value(2), false.B) + def disableSpeculativeICacheRefill = getOrElse(chickenCSR, _.value(3), false.B) + def suppressCorruptOnGrantData = getOrElse(chickenCSR, _.value(9), false.B) + protected def getByIdOrElse[T](id: Int, f: CustomCSRIO => T, alt: T): T = { + val idx = decls.indexWhere(_.id == id) + if (idx < 0) alt else f(csrs(idx)) + } + + protected def getOrElse[T](csr: Option[CustomCSR], f: CustomCSRIO => T, alt: T): T = + csr.map(c => getByIdOrElse(c.id, f, alt)).getOrElse(alt) +} + +class TileInterrupts(usingSupervisor: Boolean, nLocalInterrupts: Int, usingNMI: Boolean, resetVectorLen: Int) extends Bundle { + val debug: Bool = Bool() + val mtip: Bool = Bool() + val msip: Bool = Bool() + val meip: Bool = Bool() + val seip: Option[Bool] = Option.when(usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(nLocalInterrupts, Bool()) + val nmi = Option.when(usingNMI)(new NMI(resetVectorLen)) +} + +class NMI(w: Int) extends Bundle { + val rnmi = Bool() + val rnmi_interrupt_vector = UInt(w.W) + val rnmi_exception_vector = UInt(w.W) +} + +class CoreInterrupts(usingSupervisor: Boolean, nLocalInterrupts: Int, hasBeu: Boolean, usingNMI: Boolean, resetVectorLen: Int) extends Bundle { + val tileInterrupts = new TileInterrupts(usingSupervisor, nLocalInterrupts, usingNMI, resetVectorLen) + val buserror = Option.when(hasBeu)(Bool()) +} + +class HStatus extends Bundle { + val zero6 = UInt(30.W) + val vsxl = UInt(2.W) + val zero5 = UInt(9.W) + val vtsr = Bool() + val vtw = Bool() + val vtvm = Bool() + val zero3 = UInt(2.W) + val vgein = UInt(6.W) + val zero2 = UInt(2.W) + val hu = Bool() + val spvp = Bool() + val spv = Bool() + val gva = Bool() + val vsbe = Bool() + val zero1 = UInt(5.W) +} + +class CSRDecodeIO(iLen: Int) extends Bundle { + val inst = Input(UInt(iLen.W)) + val fpIllegal = Output(Bool()) + val fpCsr = Output(Bool()) + val readIllegal = Output(Bool()) + val writeIllegal = Output(Bool()) + val writeFlush = Output(Bool()) + val systemIllegal = Output(Bool()) + val virtualAccessIllegal = Output(Bool()) + val virtualSystemIllegal = Output(Bool()) +} + +object PTBR { + def additionalPgLevels(ptbr: PTBR, pgLevels: Int, minPgLevels: Int) = ptbr.mode(log2Ceil(pgLevels - minPgLevels + 1) - 1, 0) + def modeBits(xLen: Int) = xLen match { + case 32 => 1 + case 64 => 4 + } + def maxASIdBits(xLen: Int) = xLen match { + case 32 => 9 + case 64 => 16 + } +} + +class PTBR(xLen: Int, maxPAddrBits: Int, pgIdxBits: Int) extends Bundle { + val mode: UInt = UInt(PTBR.modeBits(xLen).W) + val asid = UInt(PTBR.maxASIdBits(xLen).W) + val ppn = UInt((maxPAddrBits - pgIdxBits).W) +} + +// TODO: remove me. +object FPConstants { + val RM_SZ = 3 + val FLAGS_SZ = 5 +} + + +object PMP { + def lgAlign = 2 + private def UIntToOH1(x: UInt, width: Int): UInt = ~((-1).S(width.W).asUInt << x)(width - 1, 0) + + // For PMPReg + def reset(pmp: PMP): Unit = { + pmp.cfg.a := 0.U + pmp.cfg.l := 0.U + } + def readAddr(pmp: PMP, pmpGranularity: Int) = + if (log2Ceil(pmpGranularity) == PMP.lgAlign) + pmp.addr + else { + val mask = ((BigInt(1) << (log2Ceil(pmpGranularity) - PMP.lgAlign)) - 1).U + Mux(napot(pmp), pmp.addr | (mask >> 1), ~(~pmp.addr | mask)) + } + def napot(pmp: PMP) = pmp.cfg.a(1) + def napot(pmp: PMPReg) = pmp.cfg.a(1) + def torNotNAPOT(pmp: PMP) = pmp.cfg.a(0) + def tor(pmp: PMP) = !napot(pmp) && torNotNAPOT(pmp) + def cfgLocked(pmp: PMP) = pmp.cfg.l + def addrLocked(pmp: PMP, next: PMP) = cfgLocked(pmp) || cfgLocked(next) && tor(next) + // PMP + def computeMask(pmp: PMP, pmpGranularity: Int): UInt = { + val base = Cat(pmp.addr, pmp.cfg.a(0)) | ((pmpGranularity - 1).U >> lgAlign) + Cat(base & ~(base + 1.U), ((1 << lgAlign) - 1).U) + } + private def comparand(pmp: PMP, pmpGranularity: Int): UInt = ~(~(pmp.addr << lgAlign) | (pmpGranularity - 1).U) + + private def pow2Match(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = { + def eval(a: UInt, b: UInt, m: UInt) = ((a ^ b) & ~m) === 0.U + if (lgMaxSize <= log2Ceil(pmpGranularity)) { + eval(x, comparand(pmp, pmpGranularity), pmp.mask) + } else { + // break up the circuit; the MSB part will be CSE'd + val lsbMask = pmp.mask | UIntToOH1(lgSize, lgMaxSize) + val msbMatch: Bool = eval(x >> lgMaxSize, comparand(pmp, pmpGranularity) >> lgMaxSize, pmp.mask >> lgMaxSize) + val lsbMatch: Bool = eval(x(lgMaxSize - 1, 0), comparand(pmp, pmpGranularity)(lgMaxSize - 1, 0), lsbMask(lgMaxSize - 1, 0)) + msbMatch && lsbMatch + } + } + + private def boundMatch(pmp: PMP, x: UInt, lsbMask: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = { + if (lgMaxSize <= log2Ceil(pmpGranularity)) { + x < comparand(pmp, pmpGranularity) + } else { + // break up the circuit; the MSB part will be CSE'd + val msbsLess: Bool = (x >> lgMaxSize) < (comparand(pmp, pmpGranularity) >> lgMaxSize) + val msbsEqual: Bool = ((x >> lgMaxSize) ^ (comparand(pmp, pmpGranularity) >> lgMaxSize)) === 0.U + val lsbsLess: Bool = (x(lgMaxSize - 1, 0) | lsbMask) < comparand(pmp, pmpGranularity)(lgMaxSize - 1, 0) + msbsLess || (msbsEqual && lsbsLess) + } + } + + private def lowerBoundMatch(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = + !boundMatch(pmp: PMP, x, UIntToOH1(lgSize, lgMaxSize), lgMaxSize, pmpGranularity: Int) + + private def upperBoundMatch(pmp: PMP, x: UInt, lgMaxSize: Int, pmpGranularity: Int): Bool = + boundMatch(pmp, x, 0.U, lgMaxSize, pmpGranularity) + + private def rangeMatch(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP, pmpGranularity: Int) = + lowerBoundMatch(prev, x, lgSize, lgMaxSize, pmpGranularity) && upperBoundMatch(pmp, x, lgMaxSize, pmpGranularity) + + private def pow2Homogeneous(pmp: PMP, x: UInt, pgLevel: UInt, paddrBits: Int, pmpGranularity: Int, pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int): Bool = { + val maskHomogeneous = VecInit(pgLevelMap(pgLevels, pgIdxBits, pgLevelBits) { idxBits => if (idxBits > paddrBits) false.B else pmp.mask(idxBits - 1) })(pgLevel) + maskHomogeneous || VecInit(pgLevelMap(pgLevels, pgIdxBits, pgLevelBits) { idxBits => ((x ^ comparand(pmp, pmpGranularity)) >> idxBits) =/= 0.U })(pgLevel) + } + + private def pgLevelMap[T](pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int)(f: Int => T): Seq[T] = (0 until pgLevels).map { i => + f(pgIdxBits + (pgLevels - 1 - i) * pgLevelBits) + } + + private def rangeHomogeneous(pmp: PMP, x: UInt, pgLevel: UInt, prev: PMP, paddrBits: Int, pmpGranularity: Int, pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int) = { + val beginsAfterLower = !(x < comparand(prev, pmpGranularity)) + val beginsAfterUpper = !(x < comparand(pmp, pmpGranularity)) + + val pgMask = VecInit(pgLevelMap(pgLevels, pgIdxBits, pgLevelBits) { idxBits => (((BigInt(1) << paddrBits) - (BigInt(1) << idxBits)).max(0)).U })(pgLevel) + val endsBeforeLower = (x & pgMask) < (comparand(prev, pmpGranularity) & pgMask) + val endsBeforeUpper = (x & pgMask) < (comparand(pmp, pmpGranularity) & pgMask) + + endsBeforeLower || beginsAfterUpper || (beginsAfterLower && endsBeforeUpper) + } + + // returns whether this PMP completely contains, or contains none of, a page + def homogeneous(pmp: PMP, x: UInt, pgLevel: UInt, prev: PMP, paddrBits: Int, pmpGranularity: Int, pgLevels: Int, pgIdxBits: Int, pgLevelBits: Int): Bool = + Mux(napot(pmp), pow2Homogeneous(pmp, x, pgLevel, paddrBits, pmpGranularity, pgLevels, pgIdxBits, pgLevelBits), !torNotNAPOT(pmp) || rangeHomogeneous(pmp, x, pgLevel, prev, paddrBits, pmpGranularity, pgLevels, pgIdxBits, pgLevelBits)) + + // returns whether this matching PMP fully contains the access + def aligned(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP, pmpGranularity: Int): Bool = if (lgMaxSize <= log2Ceil(pmpGranularity)) true.B + else { + val lsbMask = UIntToOH1(lgSize, lgMaxSize) + val straddlesLowerBound: Bool = + ((x >> lgMaxSize) ^ (comparand(prev, pmpGranularity) >> lgMaxSize)) === 0.U && + (comparand(prev, pmpGranularity)(lgMaxSize - 1, 0) & ~x(lgMaxSize - 1, 0)) =/= 0.U + val straddlesUpperBound: Bool = + ((x >> lgMaxSize) ^ (comparand(pmp, pmpGranularity) >> lgMaxSize)) === 0.U && + (comparand(pmp, pmpGranularity)(lgMaxSize - 1, 0) & (x(lgMaxSize - 1, 0) | lsbMask)) =/= 0.U + val rangeAligned = !(straddlesLowerBound || straddlesUpperBound) + val pow2Aligned = (lsbMask & ~pmp.mask(lgMaxSize - 1, 0)) === 0.U + Mux(napot(pmp), pow2Aligned, rangeAligned) + } + + // returns whether this PMP matches at least one byte of the access + def hit(pmp: PMP, x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP, pmpGranularity: Int): Bool = + Mux(napot(pmp), pow2Match(pmp, x, lgSize, lgMaxSize, pmpGranularity), torNotNAPOT(pmp) && rangeMatch(pmp, x, lgSize, lgMaxSize, prev, pmpGranularity)) + +} + +class PMP(paddrBits: Int) extends Bundle { + val mask = UInt(paddrBits.W) + val cfg = new PMPConfig + val addr = UInt((paddrBits - PMP.lgAlign).W) +} + +class PMPConfig extends Bundle { + val l = Bool() + val res = UInt(2.W) + val a = UInt(2.W) + val x = Bool() + val w = Bool() + val r = Bool() +} + +class PerfCounterIO(xLen: Int, retireWidth: Int) extends Bundle { + val eventSel = Output(UInt(xLen.W)) + val inc = Input(UInt(log2Ceil(1 + retireWidth).W)) +} + +class Envcfg extends Bundle { + val stce = Bool() // only for menvcfg/henvcfg + val pbmte = Bool() // only for menvcfg/henvcfg + val zero54 = UInt(54.W) + val cbze = Bool() + val cbcfe = Bool() + val cbie = UInt(2.W) + val zero3 = UInt(3.W) + val fiom = Bool() +} + +class DCSR extends Bundle { + val xdebugver = UInt(2.W) + val zero4 = UInt(2.W) + val zero3 = UInt(12.W) + val ebreakm = Bool() + val ebreakh = Bool() + val ebreaks = Bool() + val ebreaku = Bool() + val zero2 = Bool() + val stopcycle = Bool() + val stoptime = Bool() + val cause = UInt(3.W) + val v = Bool() + val zero1 = UInt(2.W) + val step = Bool() + val prv = UInt(PRV.SZ.W) +} + +class MIP(nLocalInterrupts: Int) extends Bundle { + val lip = Vec(nLocalInterrupts, Bool()) + val zero1 = Bool() + val debug = Bool() // keep in sync with CSR.debugIntCause + val sgeip = Bool() + val meip = Bool() + val vseip = Bool() + val seip = Bool() + val ueip = Bool() + val mtip = Bool() + val vstip = Bool() + val stip = Bool() + val utip = Bool() + val msip = Bool() + val vssip = Bool() + val ssip = Bool() + val usip = Bool() +} + +object PMPReg { + def napot(pmp: PMPReg) = pmp.cfg.a(1) +} + +class PMPReg(paddrBits: Int) extends Bundle { + val cfg = new PMPConfig + val addr = UInt((paddrBits - PMP.lgAlign).W) +} + +class MNStatus extends Bundle { + val mpp = UInt(2.W) + val zero3 = UInt(3.W) + val mpv = Bool() + val zero2 = UInt(3.W) + val mie = Bool() + val zero1 = UInt(3.W) +} + diff --git a/rocketv/src/CSR.scala b/rocketv/src/CSR.scala index 780968357..266a339a5 100644 --- a/rocketv/src/CSR.scala +++ b/rocketv/src/CSR.scala @@ -1,170 +1,170 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{log2Ceil, log2Up, BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid} -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.devices.debug.DebugModuleKey -import freechips.rocketchip.tile._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property - -import scala.collection.mutable.LinkedHashMap -// TODO: remove these -import freechips.rocketchip.rocket.{CSRs, Causes, CustomCSRs, DecodeLogic} -import freechips.rocketchip.rocket.Instructions._ -import freechips.rocketchip.rocket.CustomInstructions._ - -class MStatus extends Bundle { - // not truly part of mstatus, but convenient - val debug = Bool() - val cease = Bool() - val wfi = Bool() - val isa = UInt(32.W) - - val dprv = UInt(PRV.SZ.W) // effective prv for data accesses - val dv = Bool() // effective v for data accesses - val prv = UInt(PRV.SZ.W) - val v = Bool() - - val sd = Bool() - val zero2 = UInt(23.W) - val mpv = Bool() - val gva = Bool() - val mbe = Bool() - val sbe = Bool() - val sxl = UInt(2.W) - val uxl = UInt(2.W) - val sd_rv32 = Bool() - val zero1 = UInt(8.W) - val tsr = Bool() - val tw = Bool() - val tvm = Bool() - val mxr = Bool() - val sum = Bool() - val mprv = Bool() - val xs = UInt(2.W) - val fs = UInt(2.W) - val mpp = UInt(2.W) - val vs = UInt(2.W) - val spp = UInt(1.W) - val mpie = Bool() - val ube = Bool() - val spie = Bool() - val upie = Bool() - val mie = Bool() - val hie = Bool() - val sie = Bool() - val uie = Bool() +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ +// @todo: remove me +import org.chipsalliance.rocketv.rvdecoderdbcompat._ + +import scala.collection.mutable + +class EventSet(val gate: (UInt, UInt) => Bool, val events: Seq[(String, () => Bool)]) { + def size = events.size + val hits = WireDefault(VecInit(Seq.fill(size)(false.B))) + def check(mask: UInt) = { + hits := events.map(_._2()) + gate(mask, hits.asUInt) + } + def dump(): Unit = { + for (((name, _), i) <- events.zipWithIndex) + when(check(1.U << i)) { printf(s"Event $name\n") } + } } -class MNStatus extends Bundle { - val mpp = UInt(2.W) - val zero3 = UInt(3.W) - val mpv = Bool() - val zero2 = UInt(3.W) - val mie = Bool() - val zero1 = UInt(3.W) -} +class EventSets(val eventSets: Seq[EventSet]) { + def maskEventSelector(eventSel: UInt): UInt = { + // allow full associativity between counters and event sets (for now?) + val setMask = (BigInt(1) << eventSetIdBits) - 1 + val maskMask = ((BigInt(1) << eventSets.map(_.size).max) - 1) << maxEventSetIdBits + eventSel & (setMask | maskMask).U + } -class HStatus extends Bundle { - val zero6 = UInt(30.W) - val vsxl = UInt(2.W) - val zero5 = UInt(9.W) - val vtsr = Bool() - val vtw = Bool() - val vtvm = Bool() - val zero3 = UInt(2.W) - val vgein = UInt(6.W) - val zero2 = UInt(2.W) - val hu = Bool() - val spvp = Bool() - val spv = Bool() - val gva = Bool() - val vsbe = Bool() - val zero1 = UInt(5.W) -} + private def decode(counter: UInt): (UInt, UInt) = { + require(eventSets.size <= (1 << maxEventSetIdBits)) + require(eventSetIdBits > 0) + (counter(eventSetIdBits - 1, 0), counter >> maxEventSetIdBits) + } -class DCSR extends Bundle { - val xdebugver = UInt(2.W) - val zero4 = UInt(2.W) - val zero3 = UInt(12.W) - val ebreakm = Bool() - val ebreakh = Bool() - val ebreaks = Bool() - val ebreaku = Bool() - val zero2 = Bool() - val stopcycle = Bool() - val stoptime = Bool() - val cause = UInt(3.W) - val v = Bool() - val zero1 = UInt(2.W) - val step = Bool() - val prv = UInt(PRV.SZ.W) -} + def evaluate(eventSel: UInt): Bool = { + val (set, mask) = decode(eventSel) + val sets = for (e <- eventSets) yield { + require(e.hits.getWidth <= mask.getWidth, s"too many events ${e.hits.getWidth} wider than mask ${mask.getWidth}") + e.check(mask) + } + VecInit(sets).asUInt(set) + } -class MIP(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { - val lip = Vec(coreParams.nLocalInterrupts, Bool()) - val zero1 = Bool() - val debug = Bool() // keep in sync with CSR.debugIntCause - val sgeip = Bool() - val meip = Bool() - val vseip = Bool() - val seip = Bool() - val ueip = Bool() - val mtip = Bool() - val vstip = Bool() - val stip = Bool() - val utip = Bool() - val msip = Bool() - val vssip = Bool() - val ssip = Bool() - val usip = Bool() -} +// def cover() = eventSets.foreach { _.withCovers } -class Envcfg extends Bundle { - val stce = Bool() // only for menvcfg/henvcfg - val pbmte = Bool() // only for menvcfg/henvcfg - val zero54 = UInt(54.W) - val cbze = Bool() - val cbcfe = Bool() - val cbie = UInt(2.W) - val zero3 = UInt(3.W) - val fiom = Bool() - def write(wdata: UInt) { - val new_envcfg = wdata.asTypeOf(new Envcfg) - fiom := new_envcfg.fiom // only FIOM is writable currently - } + private def eventSetIdBits = log2Up(eventSets.size) + private def maxEventSetIdBits = 8 + + require(eventSetIdBits <= maxEventSetIdBits) } -class PTBR(implicit p: Parameters) extends CoreBundle()(p) { - def additionalPgLevels = mode.extract(log2Ceil(pgLevels - minPgLevels + 1) - 1, 0) - def pgLevelsToMode(i: Int) = (xLen, i) match { - case (32, 2) => 1 - case (64, x) if x >= 3 && x <= 6 => x + 5 - } - val (modeBits, maxASIdBits) = xLen match { - case 32 => (1, 9) - case 64 => (4, 16) - } - require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen) +case class CustomCSR(id: Int, mask: BigInt, init: Option[BigInt]) - val mode = UInt(modeBits.W) - val asid = UInt(maxASIdBits.W) - val ppn = UInt((maxPAddrBits - pgIdxBits).W) +object CustomCSR { + def constant(id: Int, value: BigInt): CustomCSR = CustomCSR(id, BigInt(0), Some(value)) } -object PRV { - val SZ = 2 - val U = 0 - val S = 1 - val H = 2 - val M = 3 +object CSRParameter { + implicit def rwP: upickle.default.ReadWriter[CSRParameter] = upickle.default.macroRW[CSRParameter] } -object CSR { +case class CSRParameter( + useAsyncReset: Boolean, + vLen: Int, + xLen: Int, + fLen: Int, + hartIdLen: Int, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + vmidBits: Int, + nPMPs: Int, + nPerfCounters: Int, + paddrBits: Int, + nBreakpoints: Int, + usingSupervisor: Boolean, + usingFPU: Boolean, + usingUser: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingAtomics: Boolean, + usingDebug: Boolean, + usingMulDiv: Boolean, + usingVector: Boolean) + extends SerializableModuleParameter { + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + // compatibility mode + // TODO: Below is parameter that not configurable + def decodeWidth = 1 + def nLocalInterrupts: Int = 0 + def minPgLevels: Int = { + val res = xLen match { + case 32 => 2 + case 64 => 3 + } + require(pgLevels >= res) + res + } + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + def customCSRSize: Int = 0 + def haveBasicCounters: Boolean = true + def resetVectorLen: Int = { + val externalLen = paddrBits + require(externalLen <= xLen, s"External reset vector length ($externalLen) must be <= XLEN ($xLen)") + require( + externalLen <= vaddrBitsExtended, + s"External reset vector length ($externalLen) must be <= virtual address bit width ($vaddrBitsExtended)" + ) + externalLen + } + def iLen: Int = 32 + private def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + (if (usingHypervisor) 1 else 0) else 0) + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + def hasBeu = false + def usingHypervisor = false + def usingNMI = false + def haveCFlush = false + def retireWidth: Int = 1 + private def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def haveFSDirty: Boolean = false + def useBPWatch: Boolean = false + def maxHypervisorExtraAddrBits: Int = 2 + def hypervisorExtraAddrBits: Int = { + if (usingHypervisor) maxHypervisorExtraAddrBits + else 0 + } + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1).min(xLen) + } + def vpnBits: Int = vaddrBits - pgIdxBits + def ppnBits: Int = paddrBits - pgIdxBits + def pmpGranularity: Int = if (usingHypervisor) 4096 else 4 + def mtvecInit: Option[BigInt] = Some(0) + def misaWritable: Boolean = false + def mtvecWritable: Boolean = true + def customIsaExt: Option[String] = None + def useRVE: Boolean = false + def debugEntry: Option[BigInt] = Some(0 + 0x800) + def debugException: Option[BigInt] = Some(0 + 0x808) + // TODO: use layer for DV + def enableCommitLog: Boolean = false + + // original CSR object // commands val SZ = 3 def X = BitPat.dontCare(SZ) @@ -178,10 +178,11 @@ object CSR { // mask a CSR cmd with a valid bit def maskCmd(valid: Bool, cmd: UInt): UInt = { // all commands less than CSR.I are treated by CSRFile as NOPs - cmd & ~Mux(valid, 0.U, CSR.I) + cmd & ~Mux(valid, 0.U, I) } val ADDRSZ = 12 + def pgIdxBits: Int = 12 def modeLSB: Int = 8 def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) @@ -208,96 +209,255 @@ object CSR { val nCtr = 32 val nHPM = nCtr - firstHPM val hpmWidth = 40 - val maxPMPs = 16 } -class PerfCounterIO(implicit p: Parameters) extends CoreBundle with HasCoreParameters { - val eventSel = Output(UInt(xLen.W)) - val inc = Input(UInt(log2Ceil(1 + retireWidth).W)) -} - -// CSR Interface with decode stage, basically check illegal -class CSRDecodeIO(implicit p: Parameters) extends CoreBundle { - val inst = Input(UInt(iLen.W)) - def csrAddr = (inst >> 20)(CSR.ADDRSZ - 1, 0) - val fpIllegal = Output(Bool()) - val fpCsr = Output(Bool()) - val readIllegal = Output(Bool()) - val writeIllegal = Output(Bool()) - val writeFlush = Output(Bool()) - val systemIllegal = Output(Bool()) - val virtualAccessIllegal = Output(Bool()) - val virtualSystemIllegal = Output(Bool()) -} - -class CSRFileIO(hasBeu: Boolean)(implicit p: Parameters) extends CoreBundle with HasCoreParameters { +class CSRInterface(parameter: CSRParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) val ungatedClock = Input(Clock()) - val interrupts = Input(new CoreInterrupts(hasBeu)) - val hartid = Input(UInt(hartIdLen.W)) + val interrupts = Input( + new CoreInterrupts( + parameter.usingSupervisor, + parameter.nLocalInterrupts, + parameter.hasBeu, + parameter.usingNMI, + parameter.resetVectorLen + ) + ) + val hartid = Input(UInt(parameter.hartIdLen.W)) val rw = new Bundle { - val addr = Input(UInt(CSR.ADDRSZ.W)) - val cmd = Input(Bits(CSR.SZ.W)) - val rdata = Output(Bits(xLen.W)) - val wdata = Input(Bits(xLen.W)) + val addr = Input(UInt(parameter.ADDRSZ.W)) + val cmd = Input(Bits(parameter.SZ.W)) + val rdata = Output(Bits(parameter.xLen.W)) + val wdata = Input(Bits(parameter.xLen.W)) } - - val decode = Vec(decodeWidth, new CSRDecodeIO) - + val decode = Vec(parameter.decodeWidth, new CSRDecodeIO(parameter.iLen)) val csrStall = Output(Bool()) // stall retire for wfi val rwStall = Output(Bool()) // stall rw, rw will have no effect while rw_stall val eret = Output(Bool()) val singleStep = Output(Bool()) - - val status = Output(new MStatus()) - val hstatus = Output(new HStatus()) - val gstatus = Output(new MStatus()) - val ptbr = Output(new PTBR()) - val hgatp = Output(new PTBR()) - val vsatp = Output(new PTBR()) - val evec = Output(UInt(vaddrBitsExtended.W)) + val status = Output(new MStatus) + val hstatus = Output(new HStatus) + val gstatus = Output(new MStatus) + val ptbr = Output(new PTBR(parameter.xLen, parameter.maxPAddrBits, parameter.pgIdxBits)) + val hgatp = Output(new PTBR(parameter.xLen, parameter.maxPAddrBits, parameter.pgIdxBits)) + val vsatp = Output(new PTBR(parameter.xLen, parameter.maxPAddrBits, parameter.pgIdxBits)) + val evec = Output(UInt(parameter.vaddrBitsExtended.W)) val exception = Input(Bool()) - val retire = Input(UInt(log2Up(1 + retireWidth).W)) - val cause = Input(UInt(xLen.W)) - val pc = Input(UInt(vaddrBitsExtended.W)) - val tval = Input(UInt(vaddrBitsExtended.W)) - val htval = Input(UInt(((maxSVAddrBits + 1).min(xLen)).W)) + val retire = Input(UInt(log2Up(1 + parameter.retireWidth).W)) + val cause = Input(UInt(parameter.xLen.W)) + val pc = Input(UInt(parameter.vaddrBitsExtended.W)) + val tval = Input(UInt(parameter.vaddrBitsExtended.W)) + val htval = Input(UInt(((parameter.maxSVAddrBits + 1).min(parameter.xLen)).W)) val gva = Input(Bool()) - val time = Output(UInt(xLen.W)) + val time = Output(UInt(parameter.xLen.W)) val fcsrRm = Output(Bits(FPConstants.RM_SZ.W)) val fcsrFlags = Flipped(Valid(Bits(FPConstants.FLAGS_SZ.W))) - val setFsDirty = coreParams.haveFSDirty.option(Input(Bool())) + val setFsDirty = Option.when(parameter.haveFSDirty)(Input(Bool())) val interrupt = Output(Bool()) - val interruptCause = Output(UInt(xLen.W)) - val bp = Output(Vec(nBreakpoints, new BP)) - val pmp = Output(Vec(nPMPs, new PMP)) - val counters = Vec(nPerfCounters, new PerfCounterIO) - val csrwCounter = Output(UInt(CSR.nCtr.W)) + val interruptCause = Output(UInt(parameter.xLen.W)) + val bp = Output( + Vec( + parameter.nBreakpoints, + new BP( + parameter.xLen, + parameter.useBPWatch, + parameter.vaddrBits, + parameter.mcontextWidth, + parameter.scontextWidth + ) + ) + ) + val pmp = Output(Vec(parameter.nPMPs, new PMP(parameter.paddrBits))) + val counters = Vec(parameter.nPerfCounters, new PerfCounterIO(parameter.xLen, parameter.retireWidth)) + val csrwCounter = Output(UInt(parameter.nCtr.W)) val inhibitCycle = Output(Bool()) - val inst = Input(Vec(retireWidth, UInt(iLen.W))) - val mcontext = Output(UInt(coreParams.mcontextWidth.W)) - val scontext = Output(UInt(coreParams.scontextWidth.W)) + val inst = Input(Vec(parameter.retireWidth, UInt(parameter.iLen.W))) + val mcontext = Output(UInt(parameter.mcontextWidth.W)) + val scontext = Output(UInt(parameter.scontextWidth.W)) val fiom = Output(Bool()) - val vectorCsr = Option.when(usingVector)(Input(Bool())) - val wbRegRS2 = Option.when(usingVector)(Input(UInt())) + val vectorCsr = Option.when(parameter.usingVector)(Input(Bool())) + val wbRegRS2 = Option.when(parameter.usingVector)(Input(UInt(parameter.xLen.W))) + // @todo custom CSR + val customCSRs = Vec(parameter.customCSRSize, new CustomCSRIO(parameter.xLen)) } -/** - * https://github.com/riscv/riscv-isa-manual/blob/main/src/zicsr.adoc - */ -class CSRFile( - perfEventSets: EventSets = new EventSets(Seq()), - customCSRs: Seq[CustomCSR] = Nil, - hasBeu: Boolean -)( - implicit p: Parameters) - extends CoreModule()(p) - with HasCoreParameters { - val vector = Option.when(usingVector)(new csr.V(vLen, usingHypervisor)) +@instantiable +class CSR(val parameter: CSRParameter) + extends FixedIORawModule(new CSRInterface(parameter)) + with SerializableModule[CSRParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val perfEventSets: EventSets = new EventSets(Nil) + val customCSRs: Seq[CustomCSR] = Nil + // compatibility mode + // TODO: remove me. + def HLV_B = BitPat("b011000000000?????100?????1110011") + def HLV_BU = BitPat("b011000000001?????100?????1110011") + def HLV_D = BitPat("b011011000000?????100?????1110011") + def HLV_H = BitPat("b011001000000?????100?????1110011") + def HLV_HU = BitPat("b011001000001?????100?????1110011") + def HLV_W = BitPat("b011010000000?????100?????1110011") + def HLV_WU = BitPat("b011010000001?????100?????1110011") + def HLVX_HU = BitPat("b011001000011?????100?????1110011") + def HLVX_WU = BitPat("b011010000011?????100?????1110011") + def HSV_B = BitPat("b0110001??????????100000001110011") + def HSV_D = BitPat("b0110111??????????100000001110011") + def HSV_H = BitPat("b0110011??????????100000001110011") + def HSV_W = BitPat("b0110101??????????100000001110011") + def EBREAK = BitPat("b00000000000100000000000001110011") + def ECALL = BitPat("b00000000000000000000000001110011") + def MRET = BitPat("b00110000001000000000000001110011") + def WFI = BitPat("b00010000010100000000000001110011") + def DRET = BitPat("b01111011001000000000000001110011") + def SRET = BitPat("b00010000001000000000000001110011") + def SFENCE_VMA = BitPat("b0001001??????????000000001110011") + def HFENCE_VVMA = BitPat("b0010001??????????000000001110011") + def HFENCE_GVMA = BitPat("b0110001??????????000000001110011") + + // custom + def CEASE = BitPat("b00110000010100000000000001110011") + def MNRET = BitPat("b01110000001000000000000001110011") + def CFLUSH_D_L1 = BitPat("b111111000000?????000000001110011") + def Y = BitPat.Y() + def N = BitPat.N() + def X = BitPat.dontCare(1) + + val hasBeu: Boolean = parameter.hasBeu + val usingVector = parameter.usingVector + val customIsaExt = parameter.customIsaExt + val usingCompressed = parameter.usingCompressed + val vLen = parameter.vLen + val xLen = parameter.xLen + val fLen = parameter.fLen + val ppnBits = parameter.ppnBits + val asIdBits = parameter.asidBits + val vmIdBits = parameter.vmidBits + val nPMPs = parameter.nPMPs + val vpnBits = parameter.vpnBits + val useBPWatch = parameter.useBPWatch + val vaddrBits = parameter.vaddrBits + val paddrBits = parameter.paddrBits + val pmpGranularity = parameter.pmpGranularity + val usingHypervisor = parameter.usingHypervisor + val usingSupervisor = parameter.usingSupervisor + val usingVM = parameter.usingVM + val nLocalInterrupts: Int = parameter.nLocalInterrupts + val usingNMI = parameter.usingNMI + val usingFPU = parameter.usingFPU + val usingMulDiv = parameter.usingMulDiv + val usingAtomics = parameter.usingAtomics + val usingUser = parameter.usingUser + val vaddrBitsExtended = parameter.vaddrBitsExtended + val maxSVAddrBits = parameter.maxSVAddrBits + val nBreakpoints = parameter.nBreakpoints + val mtvecInit = parameter.mtvecInit + val nPerfCounters = parameter.nPerfCounters + val maxPAddrBits = parameter.maxPAddrBits + val pgIdxBits = parameter.pgIdxBits + val enableCommitLog = parameter.enableCommitLog + val usingDebug = parameter.usingDebug + val minPgLevels = parameter.minPgLevels + val pgLevels = parameter.pgLevels + val mtvecWritable = parameter.mtvecWritable + def pgLevelsToMode(i: Int) = (xLen, i) match { + case (32, 2) => 1 + case (64, x) if x >= 3 && x <= 6 => x + 5 + } + def write(fiom: Envcfg, wdata: UInt) { + val new_envcfg = wdata.asTypeOf(new Envcfg) + fiom := new_envcfg.fiom // only FIOM is writable currently + } + object CSR { + val busErrorIntCause = parameter.busErrorIntCause + val debugTriggerCause = parameter.debugTriggerCause + val firstHPM = parameter.firstHPM + val hpmWidth = parameter.hpmWidth + val debugIntCause = parameter.debugIntCause + val rnmiIntCause = parameter.rnmiIntCause + val rnmiBEUCause = parameter.rnmiBEUCause + val nHPM = parameter.nHPM + val firstHPE = parameter.firstHPE + val firstMHPC = parameter.firstMHPC + val firstHPC = parameter.firstHPC + val firstMHPCH = parameter.firstMHPCH + val firstHPCH = parameter.firstHPCH + val maxPMPs = parameter.maxPMPs + val N = parameter.N + val S = parameter.S + val C = parameter.C + val W = parameter.W + val I = parameter.I + def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) + def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB) + val modeLSB = parameter.modeLSB + val firstCtr = parameter.firstCtr + val nCtr = parameter.nCtr + val firstCtrH = parameter.firstCtrH + } + object coreParams { + val mcontextWidth = parameter.mcontextWidth + val scontextWidth = parameter.scontextWidth + val useRVE = parameter.useRVE + val haveBasicCounters = parameter.haveBasicCounters + val haveCFlush = parameter.haveCFlush + val misaWritable = parameter.misaWritable + val haveFSDirty = parameter.haveFSDirty + } + def inRange(x: UInt, base: UInt, bounds: UInt) = x >= base && x < bounds + + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + + // a counter that clock gates most of its MSBs using the LSB carry-out + case class WideCounter(width: Int, inc: UInt = 1.U, reset: Boolean = true, inhibit: Bool = false.B) { + private val isWide = width > (2 * inc.getWidth) + private val smallWidth = if (isWide) inc.getWidth.max(log2Up(width)) else width + private val small = if (reset) RegInit(0.U(smallWidth.W)) else Reg(UInt(smallWidth.W)) + private val nextSmall = small +& inc + when(!inhibit) { small := nextSmall } + + private val large = if (isWide) { + val r = if (reset) RegInit(0.U((width - smallWidth).W)) else Reg(UInt((width - smallWidth).W)) + when(nextSmall(smallWidth) && !inhibit) { r := r + 1.U } + r + } else null + + val value = if (isWide) Cat(large, small) else small + lazy val carryOut = { + val lo = (small ^ nextSmall) >> 1 + if (!isWide) + lo + else { + val hi = Mux(nextSmall(smallWidth), large ^ (large +& 1.U), 0.U) >> 1 + Cat(hi, lo) + } + } + + def assign(x: UInt) = { + small := x + if (isWide) large := x >> smallWidth + } + } + + // end - val io = IO(new CSRFileIO(hasBeu) { - val customCSRs = Vec(CSRFile.this.customCSRs.size, new CustomCSRIO) - }) + val vector = Option.when(usingVector)(new csr.V(vLen, usingHypervisor)) io.rwStall := false.B @@ -316,7 +476,7 @@ class CSRFile( val reg_dcsr = RegInit(reset_dcsr) val (supported_interrupts, delegable_interrupts) = { - val sup = Wire(new MIP) + val sup = Wire(new MIP(nLocalInterrupts)) sup.usip := false.B sup.ssip := usingSupervisor.B sup.vssip := usingHypervisor.B @@ -382,7 +542,7 @@ class CSRFile( ).map(1 << _).sum.U val (hs_delegable_interrupts, mideleg_always_hs) = { - val always = WireDefault(0.U.asTypeOf(new MIP())) + val always = WireDefault(0.U.asTypeOf(new MIP(nLocalInterrupts))) always.vssip := usingHypervisor.B always.vstip := usingHypervisor.B always.vseip := usingHypervisor.B @@ -396,15 +556,23 @@ class CSRFile( val reg_debug = RegInit(false.B) val reg_dpc = Reg(UInt(vaddrBitsExtended.W)) val reg_dscratch0 = Reg(UInt(xLen.W)) - val reg_dscratch1 = (p(DebugModuleKey).map(_.nDscratch).getOrElse(1) > 1).option(Reg(UInt(xLen.W))) + // val reg_dscratch1 = (p(DebugModuleKey).map(_.nDscratch).getOrElse(1) > 1).option(Reg(UInt(xLen.W))) + // @todo: optional, if debug base is not zero. + val reg_dscratch1 = Reg(UInt(parameter.xLen.W)) val reg_singleStepped = Reg(Bool()) - val reg_mcontext = (coreParams.mcontextWidth > 0).option(RegInit(0.U(coreParams.mcontextWidth.W))) - val reg_scontext = (coreParams.scontextWidth > 0).option(RegInit(0.U(coreParams.scontextWidth.W))) + val reg_mcontext = Option.when(coreParams.mcontextWidth > 0)(RegInit(0.U(coreParams.mcontextWidth.W))) + val reg_scontext = Option.when(coreParams.scontextWidth > 0)(RegInit(0.U(coreParams.scontextWidth.W))) val reg_tselect = Reg(UInt(log2Up(nBreakpoints).W)) - val reg_bp = Reg(Vec(1 << log2Up(nBreakpoints), new BP)) - val reg_pmp = Reg(Vec(nPMPs, new PMPReg)) + val reg_bp = Reg( + Vec( + 1 << log2Up(nBreakpoints), + new BP(xLen, useBPWatch, vaddrBits, coreParams.mcontextWidth, coreParams.scontextWidth) + ) + ) + + val reg_pmp = Reg(Vec(nPMPs, new PMPReg(paddrBits))) val reg_mie = Reg(UInt(xLen.W)) val (reg_mideleg, read_mideleg) = { @@ -415,7 +583,7 @@ class CSRFile( val reg = Reg(UInt(xLen.W)) (reg, Mux(usingSupervisor.B, reg & delegable_exceptions, 0.U)) } - val reg_mip = Reg(new MIP) + val reg_mip = Reg(new MIP(nLocalInterrupts)) val reg_mepc = Reg(UInt(vaddrBitsExtended.W)) val reg_mcause = RegInit(0.U(xLen.W)) val reg_mtval = Reg(UInt(vaddrBitsExtended.W)) @@ -450,7 +618,7 @@ class CSRFile( (reg, Mux(usingSupervisor.B, reg & delegable_counters, 0.U)) } - val (reg_hideleg, read_hideleg) = { + val (reg_hideleg: UInt, read_hideleg: UInt) = { val reg = Reg(UInt(xLen.W)) (reg, Mux(usingHypervisor.B, reg & hs_delegable_interrupts, 0.U)) } @@ -464,28 +632,28 @@ class CSRFile( (reg, Mux(usingHypervisor.B, reg & hs_delegable_counters, 0.U)) } val reg_hstatus = RegInit(0.U.asTypeOf(new HStatus)) - val reg_hgatp = Reg(new PTBR) + val reg_hgatp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) val reg_htval = Reg(reg_mtval2.cloneType) val read_hvip = reg_mip.asUInt & hs_delegable_interrupts val read_hie = reg_mie & hs_delegable_interrupts val (reg_vstvec, read_vstvec) = { val reg = Reg(UInt(vaddrBitsExtended.W)) - (reg, formTVec(reg).sextTo(xLen)) + (reg, sextTo(formTVec(reg), xLen)) } val reg_vsstatus = Reg(new MStatus) val reg_vsscratch = Reg(Bits(xLen.W)) val reg_vsepc = Reg(UInt(vaddrBitsExtended.W)) val reg_vscause = Reg(Bits(xLen.W)) val reg_vstval = Reg(UInt(vaddrBitsExtended.W)) - val reg_vsatp = Reg(new PTBR) + val reg_vsatp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) val reg_sepc = Reg(UInt(vaddrBitsExtended.W)) val reg_scause = Reg(Bits(xLen.W)) val reg_stval = Reg(UInt(vaddrBitsExtended.W)) val reg_sscratch = Reg(Bits(xLen.W)) val reg_stvec = Reg(UInt((if (usingHypervisor) vaddrBitsExtended else vaddrBits).W)) - val reg_satp = Reg(new PTBR) + val reg_satp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) val reg_wfi = withClock(io.ungatedClock) { RegInit(false.B) } val reg_fflags = Reg(UInt(5.W)) @@ -505,26 +673,26 @@ class CSRFile( } val mip = WireDefault(reg_mip) - mip.lip := (io.interrupts.lip: Seq[Bool]) - mip.mtip := io.interrupts.mtip - mip.msip := io.interrupts.msip - mip.meip := io.interrupts.meip + mip.lip := io.interrupts.tileInterrupts.lip + mip.mtip := io.interrupts.tileInterrupts.mtip + mip.msip := io.interrupts.tileInterrupts.msip + mip.meip := io.interrupts.tileInterrupts.meip // seip is the OR of reg_mip.seip and the actual line from the PLIC - io.interrupts.seip.foreach { mip.seip := reg_mip.seip || _ } + io.interrupts.tileInterrupts.seip.foreach { mip.seip := reg_mip.seip || _ } // Simimlar sort of thing would apply if the PLIC had a VSEIP line: //io.interrupts.vseip.foreach { mip.vseip := reg_mip.vseip || _ } val read_mip = mip.asUInt & supported_interrupts val read_hip = read_mip & hs_delegable_interrupts val high_interrupts = (if (usingNMI) 0.U else io.interrupts.buserror.map(_ << CSR.busErrorIntCause).getOrElse(0.U)) - val pending_interrupts = high_interrupts | (read_mip & reg_mie) - val d_interrupts = io.interrupts.debug << CSR.debugIntCause - val (nmi_interrupts, nmiFlag) = io.interrupts.nmi + val pending_interrupts: UInt = high_interrupts | (read_mip & reg_mie) + val d_interrupts: UInt = io.interrupts.tileInterrupts.debug << CSR.debugIntCause + val (nmi_interrupts: UInt, nmiFlag: Bool) = io.interrupts.tileInterrupts.nmi .map(nmi => ( ((nmi.rnmi && reg_rnmie) << CSR.rnmiIntCause) | io.interrupts.buserror.map(_ << CSR.rnmiBEUCause).getOrElse(0.U), - !io.interrupts.debug && nmi.rnmi && reg_rnmie + !io.interrupts.tileInterrupts.debug && nmi.rnmi && reg_rnmie ) ) .getOrElse(0.U, false.B) @@ -544,14 +712,25 @@ class CSRFile( Seq(vs_interrupts, s_interrupts, m_interrupts, nmi_interrupts, d_interrupts) ) val interruptMSB = BigInt(1) << (xLen - 1) - val interruptCause = interruptMSB.U + (nmiFlag << (xLen - 2)) + whichInterrupt + val interruptCause: UInt = interruptMSB.U + ((nmiFlag << (xLen - 2)): UInt) + whichInterrupt io.interrupt := (anyInterrupt && !io.singleStep || reg_singleStepped) && !(reg_debug || io.status.cease) io.interruptCause := interruptCause io.bp := reg_bp.take(nBreakpoints) io.mcontext := reg_mcontext.getOrElse(0.U) io.scontext := reg_scontext.getOrElse(0.U) io.fiom := (reg_mstatus.prv < PRV.M.U && reg_menvcfg.fiom) || (reg_mstatus.prv < PRV.S.U && reg_senvcfg.fiom) || (reg_mstatus.v && reg_henvcfg.fiom) - io.pmp := reg_pmp.map(PMP(_)) + def genPMP(reg: PMPReg): PMP = { + val pmp = Wire(new PMP(parameter.paddrBits)) + pmp.cfg := reg.cfg + pmp.addr := reg.addr + def computeMask(pmp: PMP) = { + val base = Cat(pmp.addr, pmp.cfg.a(0)) | ((parameter.pmpGranularity - 1).U >> PMP.lgAlign) + Cat(base & ~(base + 1.U), ((1 << PMP.lgAlign) - 1).U) + } + pmp.mask := computeMask(pmp) + pmp + } + io.pmp := reg_pmp.map(genPMP) val isaMaskString = (if (usingMulDiv) "M" else "") + @@ -569,13 +748,13 @@ class CSRFile( val isaMax = (BigInt(log2Ceil(xLen) - 4) << (xLen - 2)) | isaStringToMask(isaString) val reg_misa = RegInit(isaMax.U) val read_mstatus = io.status.asUInt - val read_mtvec = formTVec(reg_mtvec).padTo(xLen) - val read_stvec = formTVec(reg_stvec).sextTo(xLen) + val read_mtvec = padTo(formTVec(reg_mtvec), xLen) + val read_stvec = sextTo(formTVec(reg_stvec), xLen) - val read_mapping = LinkedHashMap[Int, Bits]( + val read_mapping = mutable.LinkedHashMap[Int, Bits]( CSRs.tselect -> reg_tselect, CSRs.tdata1 -> reg_bp(reg_tselect).control.asUInt, - CSRs.tdata2 -> reg_bp(reg_tselect).address.sextTo(xLen), + CSRs.tdata2 -> sextTo(reg_bp(reg_tselect).address, xLen), CSRs.tdata3 -> reg_bp(reg_tselect).textra.asUInt, CSRs.misa -> reg_misa, CSRs.mstatus -> read_mstatus, @@ -583,45 +762,45 @@ class CSRFile( CSRs.mip -> read_mip, CSRs.mie -> reg_mie, CSRs.mscratch -> reg_mscratch, - CSRs.mepc -> readEPC(reg_mepc).sextTo(xLen), - CSRs.mtval -> reg_mtval.sextTo(xLen), + CSRs.mepc -> sextTo(readEPC(reg_mepc), xLen), + CSRs.mtval -> sextTo(reg_mtval, xLen), CSRs.mcause -> reg_mcause, CSRs.mhartid -> io.hartid ) val debug_csrs = - if (!usingDebug) LinkedHashMap() + if (!usingDebug) mutable.LinkedHashMap() else - LinkedHashMap[Int, Bits]( + mutable.LinkedHashMap[Int, Bits]( CSRs.dcsr -> reg_dcsr.asUInt, - CSRs.dpc -> readEPC(reg_dpc).sextTo(xLen), - CSRs.dscratch0 -> reg_dscratch0.asUInt - ) ++ - reg_dscratch1.map(r => CSRs.dscratch1 -> r) + CSRs.dpc -> sextTo(readEPC(reg_dpc), xLen), + CSRs.dscratch0 -> reg_dscratch0.asUInt, + CSRs.dscratch1 -> reg_dscratch1.asUInt + ) val read_mnstatus = WireInit(0.U.asTypeOf(new MNStatus())) read_mnstatus.mpp := reg_mnstatus.mpp read_mnstatus.mpv := reg_mnstatus.mpv read_mnstatus.mie := reg_rnmie val nmi_csrs = - if (!usingNMI) LinkedHashMap() + if (!usingNMI) mutable.LinkedHashMap() else - LinkedHashMap[Int, Bits]( + mutable.LinkedHashMap[Int, Bits]( CustomCSRs.mnscratch -> reg_mnscratch, - CustomCSRs.mnepc -> readEPC(reg_mnepc).sextTo(xLen), + CustomCSRs.mnepc -> sextTo(readEPC(reg_mnepc), xLen), CustomCSRs.mncause -> reg_mncause, CustomCSRs.mnstatus -> read_mnstatus.asUInt ) - val context_csrs = LinkedHashMap[Int, Bits]() ++ + val context_csrs = mutable.LinkedHashMap[Int, Bits]() ++ reg_mcontext.map(r => CSRs.mcontext -> r) ++ reg_scontext.map(r => CSRs.scontext -> r) val read_fcsr = Cat(reg_frm, reg_fflags) - val fp_csrs = LinkedHashMap[Int, Bits]() ++ - usingFPU.option(CSRs.fflags -> reg_fflags) ++ - usingFPU.option(CSRs.frm -> reg_frm) ++ - usingFPU.option(CSRs.fcsr -> read_fcsr) + val fp_csrs = mutable.LinkedHashMap[Int, Bits]() ++ + Option.when(usingFPU)(CSRs.fflags -> reg_fflags) ++ + Option.when(usingFPU)(CSRs.frm -> reg_frm) ++ + Option.when(usingFPU)(CSRs.fcsr -> read_fcsr) read_mapping ++= debug_csrs read_mapping ++= nmi_csrs @@ -630,7 +809,7 @@ class CSRFile( // Vector read CSR logic injection vector.foreach { v => - read_mapping ++= LinkedHashMap[Int, Bits]( + read_mapping ++= mutable.LinkedHashMap[Int, Bits]( CSRs.vxsat -> v.states("vxsat"), CSRs.vxrm -> v.states("vxrm"), CSRs.vcsr -> v.states("vxrm") ## v.states("vxsat"), @@ -644,13 +823,13 @@ class CSRFile( if (coreParams.haveBasicCounters) { read_mapping += CSRs.mcountinhibit -> reg_mcountinhibit - read_mapping += CSRs.mcycle -> reg_cycle - read_mapping += CSRs.minstret -> reg_instret + read_mapping += CSRs.mcycle -> reg_cycle.value + read_mapping += CSRs.minstret -> reg_instret.value for ( ((e, c), i) <- (reg_hpmevent .padTo(CSR.nHPM, 0.U) - .zip(reg_hpmcounter.map(x => x: UInt).padTo(CSR.nHPM, 0.U))) + .zip(reg_hpmcounter.map(x => x.value).padTo(CSR.nHPM, 0.U))) .zipWithIndex ) { read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN @@ -665,14 +844,14 @@ class CSRFile( if (usingUser) { read_mapping += CSRs.mcounteren -> read_mcounteren } - read_mapping += CSRs.cycle -> reg_cycle - read_mapping += CSRs.instret -> reg_instret + read_mapping += CSRs.cycle -> reg_cycle.value + read_mapping += CSRs.instret -> reg_instret.value if (xLen == 32) { - read_mapping += CSRs.mcycleh -> (reg_cycle >> 32) - read_mapping += CSRs.minstreth -> (reg_instret >> 32) - read_mapping += CSRs.cycleh -> (reg_cycle >> 32) - read_mapping += CSRs.instreth -> (reg_instret >> 32) + read_mapping += CSRs.mcycleh -> (reg_cycle.value >> 32) + read_mapping += CSRs.minstreth -> (reg_instret.value >> 32) + read_mapping += CSRs.cycleh -> (reg_cycle.value >> 32) + read_mapping += CSRs.instreth -> (reg_instret.value >> 32) } } @@ -683,7 +862,7 @@ class CSRFile( } val sie_mask = { - val sgeip_mask = WireInit(0.U.asTypeOf(new MIP)) + val sgeip_mask = WireInit(0.U.asTypeOf(new MIP(nLocalInterrupts))) sgeip_mask.sgeip := true.B read_mideleg & ~(hs_delegable_interrupts | sgeip_mask.asUInt) } @@ -708,9 +887,9 @@ class CSRFile( read_mapping += CSRs.sie -> read_sie.asUInt read_mapping += CSRs.sscratch -> reg_sscratch read_mapping += CSRs.scause -> reg_scause - read_mapping += CSRs.stval -> reg_stval.sextTo(xLen) + read_mapping += CSRs.stval -> sextTo(reg_stval, xLen) read_mapping += CSRs.satp -> reg_satp.asUInt - read_mapping += CSRs.sepc -> readEPC(reg_sepc).sextTo(xLen) + read_mapping += CSRs.sepc -> sextTo(readEPC(reg_sepc), xLen) read_mapping += CSRs.stvec -> read_stvec read_mapping += CSRs.scounteren -> read_scounteren read_mapping += CSRs.mideleg -> read_mideleg @@ -722,11 +901,20 @@ class CSRFile( def pmpCfgIndex(i: Int) = (xLen / 32) * (i / pmpCfgPerCSR) if (reg_pmp.nonEmpty) { require(reg_pmp.size <= CSR.maxPMPs) - val read_pmp = reg_pmp.padTo(CSR.maxPMPs, 0.U.asTypeOf(new PMP)) + // TODO: rc bug. + val read_pmp = reg_pmp.padTo(CSR.maxPMPs, 0.U.asTypeOf(new PMPReg(paddrBits))) for (i <- 0 until read_pmp.size by pmpCfgPerCSR) - read_mapping += (CSRs.pmpcfg0 + pmpCfgIndex(i)) -> read_pmp.map(_.cfg).slice(i, i + pmpCfgPerCSR).asUInt - for ((pmp, i) <- read_pmp.zipWithIndex) - read_mapping += (CSRs.pmpaddr0 + i) -> pmp.readAddr + read_mapping += (CSRs.pmpcfg0 + pmpCfgIndex(i)) -> Cat( + read_pmp.map(_.cfg).slice(i, i + pmpCfgPerCSR).reverse.map(_.asUInt) + ).asUInt + for ((pmp, i) <- read_pmp.zipWithIndex) { + def pmpReadAddr(x: PMPReg) = if (log2Ceil(pmpGranularity) == PMP.lgAlign) x.addr + else { + val mask = ((BigInt(1) << (log2Ceil(pmpGranularity) - PMP.lgAlign)) - 1).U + Mux(PMP.napot(x), x.addr | (mask >> 1), ~(~x.addr | mask)) + } + read_mapping += (CSRs.pmpaddr0 + i) -> pmpReadAddr(pmp) + } } // implementation-defined CSRs @@ -746,7 +934,7 @@ class CSRFile( read_mapping += CSRs.mtinst -> 0.U read_mapping += CSRs.mtval2 -> reg_mtval2 - val read_hstatus = io.hstatus.asUInt.extract(xLen - 1, 0) + val read_hstatus = io.hstatus.asUInt(xLen - 1, 0) read_mapping += CSRs.hstatus -> read_hstatus read_mapping += CSRs.hedeleg -> read_hedeleg @@ -766,9 +954,9 @@ class CSRFile( val read_vsie = (read_hie & read_hideleg) >> 1 val read_vsip = (read_hip & read_hideleg) >> 1 - val read_vsepc = readEPC(reg_vsepc).sextTo(xLen) - val read_vstval = reg_vstval.sextTo(xLen) - val read_vsstatus = io.gstatus.asUInt.extract(xLen - 1, 0) + val read_vsepc = sextTo(readEPC(reg_vsepc), xLen) + val read_vstval = sextTo(reg_vstval, xLen) + val read_vsstatus = io.gstatus.asUInt(xLen - 1, 0) read_mapping += CSRs.vsstatus -> read_vsstatus read_mapping += CSRs.vsip -> read_vsip @@ -813,13 +1001,13 @@ class CSRFile( CEASE -> List(N, N, N, Y, N, N, N, N, N), WFI -> List(N, N, N, N, Y, N, N, N, N) ) ++ - usingDebug.option(DRET -> List(N, N, Y, N, N, N, N, N, N)) ++ - usingNMI.option(MNRET -> List(N, N, Y, N, N, N, N, N, N)) ++ - coreParams.haveCFlush.option(CFLUSH_D_L1 -> List(N, N, N, N, N, N, N, N, N)) ++ - usingSupervisor.option(SRET -> List(N, N, Y, N, N, N, N, N, N)) ++ - usingVM.option(SFENCE_VMA -> List(N, N, N, N, N, Y, N, N, N)) ++ - usingHypervisor.option(HFENCE_VVMA -> List(N, N, N, N, N, N, Y, N, N)) ++ - usingHypervisor.option(HFENCE_GVMA -> List(N, N, N, N, N, N, N, Y, N)) ++ + Option.when(usingDebug)(DRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + Option.when(usingNMI)(MNRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + Option.when(coreParams.haveCFlush)(CFLUSH_D_L1 -> List(N, N, N, N, N, N, N, N, N)) ++ + Option.when(usingSupervisor)(SRET -> List(N, N, Y, N, N, N, N, N, N)) ++ + Option.when(usingVM)(SFENCE_VMA -> List(N, N, N, N, N, Y, N, N, N)) ++ + Option.when(usingHypervisor)(HFENCE_VVMA -> List(N, N, N, N, N, N, Y, N, N)) ++ + Option.when(usingHypervisor)(HFENCE_GVMA -> List(N, N, N, N, N, N, N, Y, N)) ++ (if (usingHypervisor) hlsv.map(_ -> List(N, N, N, N, N, N, N, N, Y)) else Seq()) val insn_call :: insn_break :: insn_ret :: insn_cease :: insn_wfi :: _ :: _ :: _ :: _ :: Nil = { val insn = ECALL.value.U | (io.rw.addr << 20) @@ -829,15 +1017,17 @@ class CSRFile( for (io_dec <- io.decode) { val addr = io_dec.inst(31, 20) - def decodeAny(m: LinkedHashMap[Int, Bits]): Bool = m.map { case (k: Int, _: Bits) => addr === k.U }.reduce(_ || _) - def decodeFast(s: Seq[Int]): Bool = DecodeLogic(addr, s.map(_.U), (read_mapping -- s).keys.toList.map(_.U)) + def decodeAny(m: mutable.LinkedHashMap[Int, Bits]): Bool = + m.map { case (k: Int, _: Bits) => addr === k.U }.reduce(_ || _) + def decodeFast(s: Seq[Int]): Bool = DecodeLogic(addr, s.map(_.U), (read_mapping -- s).keys.toList.map(_.U)) val _ :: is_break :: is_ret :: _ :: is_wfi :: is_sfence :: is_hfence_vvma :: is_hfence_gvma :: is_hlsv :: Nil = DecodeLogic(io_dec.inst, decode_table(0)._2.map(x => X), decode_table).map(_.asBool) - val is_counter = (addr.inRange(CSR.firstCtr.U, (CSR.firstCtr + CSR.nCtr).U) || addr.inRange( + val is_counter = inRange(addr, CSR.firstCtr.U, (CSR.firstCtr + CSR.nCtr).U) || inRange( + addr, CSR.firstCtrH.U, (CSR.firstCtrH + CSR.nCtr).U - )) + ) val allow_wfi = (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !reg_mstatus.tw && (!reg_mstatus.v || !reg_hstatus.vtw) @@ -898,7 +1088,7 @@ class CSRFile( is_sfence && (!reg_mstatus.prv(0) || reg_hstatus.vtvm)) } - val cause = + val cause: UInt = Mux( insn_call, Causes.user_ecall.U + Mux(reg_mstatus.prv(0) && reg_mstatus.v, PRV.H.U, reg_mstatus.prv), @@ -913,8 +1103,8 @@ class CSRFile( ) val trapToDebug = usingDebug.B && (reg_singleStepped || causeIsDebugInt || causeIsDebugTrigger || causeIsDebugBreak || reg_debug) - val debugEntry = p(DebugModuleKey).map(_.debugEntry).getOrElse(BigInt(0x800)) - val debugException = p(DebugModuleKey).map(_.debugException).getOrElse(BigInt(0x808)) + val debugEntry = parameter.debugEntry.getOrElse(BigInt(0x800)) + val debugException = parameter.debugException.getOrElse(BigInt(0x808)) val debugTVec = Mux(reg_debug, Mux(insn_break, debugEntry.U, debugException.U), debugEntry.U) val delegate = usingSupervisor.B && reg_mstatus.prv <= PRV.S.U && Mux( cause(xLen - 1), @@ -939,8 +1129,8 @@ class CSRFile( cause(xLen - 1) && cause(xLen - 2) && (cause_lsbs === CSR.rnmiIntCause.U || cause_lsbs === CSR.rnmiBEUCause.U) val causeIsRnmiBEU = cause(xLen - 1) && cause(xLen - 2) && cause_lsbs === CSR.rnmiBEUCause.U val causeIsNmi = causeIsRnmiInt - val nmiTVecInt = io.interrupts.nmi.map(nmi => nmi.rnmi_interrupt_vector).getOrElse(0.U) - val nmiTVecXcpt = io.interrupts.nmi.map(nmi => nmi.rnmi_exception_vector).getOrElse(0.U) + val nmiTVecInt = io.interrupts.tileInterrupts.nmi.map(nmi => nmi.rnmi_interrupt_vector).getOrElse(0.U) + val nmiTVecXcpt = io.interrupts.tileInterrupts.nmi.map(nmi => nmi.rnmi_exception_vector).getOrElse(0.U) val trapToNmiInt = usingNMI.B && causeIsNmi val trapToNmiXcpt = usingNMI.B && !nmie val trapToNmi = trapToNmiInt || trapToNmiXcpt @@ -979,8 +1169,8 @@ class CSRFile( ) when(insn_wfi && !io.singleStep && !reg_debug) { reg_wfi := true.B } - when(pending_interrupts.orR || io.interrupts.debug || exception) { reg_wfi := false.B } - io.interrupts.nmi.map(nmi => when(nmi.rnmi) { reg_wfi := false.B }) + when(pending_interrupts.orR || io.interrupts.tileInterrupts.debug || exception) { reg_wfi := false.B } + io.interrupts.tileInterrupts.nmi.map(nmi => when(nmi.rnmi) { reg_wfi := false.B }) when(io.retire(0) || exception) { reg_singleStepped := true.B } when(!io.singleStep) { reg_singleStepped := false.B } @@ -1056,8 +1246,8 @@ class CSRFile( val en = exception && (supported_interrupts & (BigInt(1) << i).U) =/= 0.U && cause === (BigInt(1) << (xLen - 1)).U + i.U val delegable = (delegable_interrupts & (BigInt(1) << i).U) =/= 0.U - property.cover(en && !delegate, s"INTERRUPT_M_$i") - property.cover(en && delegable && delegate, s"INTERRUPT_S_$i") + // property.cover(en && !delegate, s"INTERRUPT_M_$i") + // property.cover(en && delegable && delegate, s"INTERRUPT_S_$i") } for (i <- 0 until xLen) { val supported_exceptions: BigInt = 0x8fe | @@ -1068,8 +1258,8 @@ class CSRFile( if (((supported_exceptions >> i) & 1) != 0) { val en = exception && cause === i.U val delegable = (delegable_exceptions & (BigInt(1) << i).U) =/= 0.U - property.cover(en && !delegate, s"EXCEPTION_M_$i") - property.cover(en && delegable && delegate, s"EXCEPTION_S_$i") + // property.cover(en && !delegate, s"EXCEPTION_M_$i") + // property.cover(en && delegable && delegate, s"EXCEPTION_S_$i") } } @@ -1118,7 +1308,7 @@ class CSRFile( } } - io.time := reg_cycle + io.time := reg_cycle.value io.csrStall := reg_wfi || io.status.cease io.status.cease := RegEnable(true.B, false.B, insn_cease) io.status.wfi := reg_wfi @@ -1137,23 +1327,23 @@ class CSRFile( case (k, _) => k >= CSR.firstHPC + nPerfCounters && k < CSR.firstHPC + CSR.nHPM } - coverable_counters.foreach({ - case (k, v) => { - when(!k.U(11, 10).andR) { // Cover points for RW CSR registers - property.cover( - io.rw.cmd.isOneOf(CSR.W, CSR.S, CSR.C) && io.rw.addr === k.U, - "CSR_access_" + k.toString, - "Cover Accessing Core CSR field" - ) - }.otherwise { // Cover points for RO CSR registers - property.cover( - io.rw.cmd === CSR.R && io.rw.addr === k.U, - "CSR_access_" + k.toString, - "Cover Accessing Core CSR field" - ) - } - } - }) +// coverable_counters.foreach({ +// case (k, v) => { +// when(!k.U(11, 10).andR) { // Cover points for RW CSR registers +// property.cover( +// io.rw.cmd.isOneOf(CSR.W, CSR.S, CSR.C) && io.rw.addr === k.U, +// "CSR_access_" + k.toString, +// "Cover Accessing Core CSR field" +// ) +// }.otherwise { // Cover points for RO CSR registers +// property.cover( +// io.rw.cmd === CSR.R && io.rw.addr === k.U, +// "CSR_access_" + k.toString, +// "Cover Accessing Core CSR field" +// ) +// } +// } +// }) val set_fs_dirty = WireDefault(io.setFsDirty.getOrElse(false.B)) if (coreParams.haveFSDirty) { @@ -1170,18 +1360,24 @@ class CSRFile( set_fs_dirty := true.B } - val csr_wen = io.rw.cmd.isOneOf(CSR.S, CSR.C, CSR.W) && !io.rwStall + val csr_wen = isOneOf(io.rw.cmd, Seq(CSR.S, CSR.C, CSR.W)) && !io.rwStall io.csrwCounter := Mux( - coreParams.haveBasicCounters.B && csr_wen && (io.rw.addr.inRange( + coreParams.haveBasicCounters.B && csr_wen && inRange( + io.rw.addr, CSRs.mcycle.U, (CSRs.mcycle + CSR.nCtr).U - ) || io.rw.addr.inRange(CSRs.mcycleh.U, (CSRs.mcycleh + CSR.nCtr).U)), + ) || inRange( + io.rw.addr, + CSRs.mcycleh.U, + (CSRs.mcycleh + CSR.nCtr).U + ), UIntToOH(io.rw.addr(log2Ceil(CSR.nCtr + nPerfCounters) - 1, 0)), 0.U ) when(csr_wen) { val scause_mask = ((BigInt(1) << (xLen - 1)) + 31).U /* only implement 5 LSBs and MSB */ - val satp_valid_modes = 0 +: (minPgLevels to pgLevels).map(new PTBR().pgLevelsToMode(_)) + + val satp_valid_modes = 0 +: (minPgLevels to pgLevels).map(pgLevelsToMode) when(decoded_addr(CSRs.mstatus)) { val new_mstatus = wdata.asTypeOf(new MStatus()) @@ -1227,7 +1423,7 @@ class CSRFile( // in read_mip, since read_mip.seip is the OR of reg_mip.seip and // io.interrupts.seip. We don't want the value on the PLIC line to // inadvertently be OR'd into read_mip.seip. - val new_mip = readModifyWriteCSR(io.rw.cmd, reg_mip.asUInt, io.rw.wdata).asTypeOf(new MIP) + val new_mip = readModifyWriteCSR(io.rw.cmd, reg_mip.asUInt, io.rw.wdata).asTypeOf(new MIP(nLocalInterrupts)) if (usingSupervisor) { reg_mip.ssip := new_mip.ssip reg_mip.stip := new_mip.stip @@ -1292,9 +1488,10 @@ class CSRFile( } when(decoded_addr(CSRs.dpc)) { reg_dpc := formEPC(wdata) } when(decoded_addr(CSRs.dscratch0)) { reg_dscratch0 := wdata } - reg_dscratch1.foreach { r => - when(decoded_addr(CSRs.dscratch1)) { r := wdata } - } + // reg_dscratch1.foreach { r => + // when(decoded_addr(CSRs.dscratch1)) { r := wdata } + // } + when(decoded_addr(CSRs.dscratch1)) { reg_dscratch1 := wdata } } if (usingSupervisor) { when(decoded_addr(CSRs.sstatus)) { @@ -1309,13 +1506,13 @@ class CSRFile( } } when(decoded_addr(CSRs.sip)) { - val new_sip = ((read_mip & ~read_mideleg) | (wdata & read_mideleg)).asTypeOf(new MIP()) + val new_sip = ((read_mip & ~read_mideleg) | (wdata & read_mideleg)).asTypeOf(new MIP(nLocalInterrupts)) reg_mip.ssip := new_sip.ssip } when(decoded_addr(CSRs.satp)) { if (usingVM) { - val new_satp = wdata.asTypeOf(new PTBR()) - when(new_satp.mode.isOneOf(satp_valid_modes.map(_.U))) { + val new_satp = wdata.asTypeOf(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + when(isOneOf(new_satp.mode, satp_valid_modes.map(_.U))) { reg_satp.mode := new_satp.mode & satp_valid_modes.reduce(_ | _).U reg_satp.ppn := new_satp.ppn(ppnBits - 1, 0) if (asIdBits > 0) reg_satp.asid := new_satp.asid(asIdBits - 1, 0) @@ -1331,7 +1528,7 @@ class CSRFile( when(decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata } when(decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata } when(decoded_addr(CSRs.scounteren)) { reg_scounteren := wdata } - when(decoded_addr(CSRs.senvcfg)) { reg_senvcfg.write(wdata) } + when(decoded_addr(CSRs.senvcfg)) { write(reg_senvcfg, wdata) } } if (usingHypervisor) { @@ -1349,23 +1546,25 @@ class CSRFile( when(decoded_addr(CSRs.hideleg)) { reg_hideleg := wdata } when(decoded_addr(CSRs.hedeleg)) { reg_hedeleg := wdata } when(decoded_addr(CSRs.hgatp)) { - val new_hgatp = wdata.asTypeOf(new PTBR()) - val valid_modes = 0 +: (minPgLevels to pgLevels).map(new_hgatp.pgLevelsToMode(_)) - when(new_hgatp.mode.isOneOf(valid_modes.map(_.U))) { + val new_hgatp = wdata.asTypeOf(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val valid_modes = 0 +: (minPgLevels to pgLevels).map(pgLevelsToMode) + when(isOneOf(new_hgatp.mode, valid_modes.map(_.U))) { reg_hgatp.mode := new_hgatp.mode & valid_modes.reduce(_ | _).U } reg_hgatp.ppn := Cat(new_hgatp.ppn(ppnBits - 1, 2), 0.U(2.W)) if (vmIdBits > 0) reg_hgatp.asid := new_hgatp.asid(vmIdBits - 1, 0) } when(decoded_addr(CSRs.hip)) { - val new_hip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP()) + val new_hip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)) + .asTypeOf(new MIP(nLocalInterrupts)) reg_mip.vssip := new_hip.vssip } when(decoded_addr(CSRs.hie)) { reg_mie := (reg_mie & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts) } when(decoded_addr(CSRs.hvip)) { - val new_sip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP()) + val new_sip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)) + .asTypeOf(new MIP(nLocalInterrupts)) reg_mip.vssip := new_sip.vssip reg_mip.vstip := new_sip.vstip reg_mip.vseip := new_sip.vseip @@ -1384,12 +1583,12 @@ class CSRFile( reg_vsstatus.fs := formFS(new_vsstatus.fs) } when(decoded_addr(CSRs.vsip)) { - val new_vsip = ((read_hip & ~read_hideleg) | ((wdata << 1) & read_hideleg)).asTypeOf(new MIP()) + val new_vsip = ((read_hip & ~read_hideleg) | ((wdata << 1) & read_hideleg)).asTypeOf(new MIP(nLocalInterrupts)) reg_mip.vssip := new_vsip.vssip } when(decoded_addr(CSRs.vsatp)) { - val new_vsatp = wdata.asTypeOf(new PTBR()) - val mode_ok = new_vsatp.mode.isOneOf(satp_valid_modes.map(_.U)) + val new_vsatp = wdata.asTypeOf(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val mode_ok = isOneOf(new_vsatp.mode, satp_valid_modes.map(_.U)) when(mode_ok) { reg_vsatp.mode := new_vsatp.mode & satp_valid_modes.reduce(_ | _).U } @@ -1404,11 +1603,11 @@ class CSRFile( when(decoded_addr(CSRs.vstvec)) { reg_vstvec := wdata } when(decoded_addr(CSRs.vscause)) { reg_vscause := wdata & scause_mask } when(decoded_addr(CSRs.vstval)) { reg_vstval := wdata } - when(decoded_addr(CSRs.henvcfg)) { reg_henvcfg.write(wdata) } + when(decoded_addr(CSRs.henvcfg)) { write(reg_henvcfg, wdata) } } if (usingUser) { when(decoded_addr(CSRs.mcounteren)) { reg_mcounteren := wdata } - when(decoded_addr(CSRs.menvcfg)) { reg_menvcfg.write(wdata) } + when(decoded_addr(CSRs.menvcfg)) { write(reg_menvcfg, wdata) } } if (nBreakpoints > 0) { when(decoded_addr(CSRs.tselect)) { reg_tselect := wdata } @@ -1418,12 +1617,12 @@ class CSRFile( when(decoded_addr(CSRs.tdata2)) { bp.address := wdata } when(decoded_addr(CSRs.tdata3)) { if (coreParams.mcontextWidth > 0) { - bp.textra.mselect := wdata(bp.textra.mselectPos) - bp.textra.mvalue := wdata >> bp.textra.mvaluePos + bp.textra.mselect := wdata(TExtra.mselectPos(xLen)) + bp.textra.mvalue := wdata >> TExtra.mvaluePos(xLen) } if (coreParams.scontextWidth > 0) { - bp.textra.sselect := wdata(bp.textra.sselectPos) - bp.textra.svalue := wdata >> bp.textra.svaluePos + bp.textra.sselect := wdata(TExtra.sselectPos) + bp.textra.svalue := wdata >> TExtra.svaluePos } } when(decoded_addr(CSRs.tdata1)) { @@ -1448,16 +1647,19 @@ class CSRFile( reg_scontext.foreach { r => when(decoded_addr(CSRs.scontext)) { r := wdata } } if (reg_pmp.nonEmpty) for (((pmp, next), i) <- (reg_pmp.zip(reg_pmp.tail :+ reg_pmp.last)).zipWithIndex) { require(xLen % pmp.cfg.getWidth == 0) - when(decoded_addr(CSRs.pmpcfg0 + pmpCfgIndex(i)) && !pmp.cfgLocked) { + def cfgLocked(pmpReg: PMPReg) = pmpReg.cfg.l + def addrLocked(pmpReg: PMPReg, next: PMPReg) = pmpReg.cfg.l + + when(decoded_addr(CSRs.pmpcfg0 + pmpCfgIndex(i)) && !cfgLocked(pmp)) { val newCfg = (wdata >> ((i * pmp.cfg.getWidth) % xLen)).asTypeOf(new PMPConfig()) pmp.cfg := newCfg // disallow unreadable but writable PMPs pmp.cfg.w := newCfg.w && newCfg.r // can't select a=NA4 with coarse-grained PMPs - if (pmpGranularity.log2 > PMP.lgAlign) + if (log2Ceil(pmpGranularity) > PMP.lgAlign) pmp.cfg.a := Cat(newCfg.a(1), newCfg.a.orR) } - when(decoded_addr(CSRs.pmpaddr0 + i) && !pmp.addrLocked(next)) { + when(decoded_addr(CSRs.pmpaddr0 + i) && !addrLocked(pmp, next)) { pmp.addr := wdata } } @@ -1483,19 +1685,23 @@ class CSRFile( val rs1IsZero = io.inst(0)(19, 15) === 0.U val rdIsZero = io.inst(0)(11, 7) === 0.U // v type set - val newVType = Mux1H(Seq( - (vsetvli || vsetivli) -> io.inst(0)(27, 20), - vsetvl -> io.wbRegRS2.get(7, 0) - )) + val newVType = Mux1H( + Seq( + (vsetvli || vsetivli) -> io.inst(0)(27, 20), + vsetvl -> io.wbRegRS2.get(7, 0) + ) + ) // vlmax = vlen * lmul / sew val vlmax: UInt = (true.B << (log2Ceil(vLen) - 6) << (newVType(2, 0) + 3.U) >> newVType(5, 3)).asUInt // set vl - val setVL = Mux1H(Seq( - ((vsetvli || vsetvl) && !rs1IsZero) -> Mux(io.rw.wdata > vlmax, vlmax, io.rw.wdata), - ((vsetvli || vsetvl) && rs1IsZero && !rdIsZero) -> vlmax, - ((vsetvli || vsetvl) && rs1IsZero && rdIsZero) -> vector.get.states("vl"), - vsetivli -> io.inst(0)(19, 15) - )) + val setVL = Mux1H( + Seq( + ((vsetvli || vsetvl) && !rs1IsZero) -> Mux(io.rw.wdata > vlmax, vlmax, io.rw.wdata), + ((vsetvli || vsetvl) && rs1IsZero && !rdIsZero) -> vlmax, + ((vsetvli || vsetvl) && rs1IsZero && rdIsZero) -> vector.get.states("vl"), + vsetivli -> io.inst(0)(19, 15) + ) + ) setVlReadData := Mux(io.retire(0) && io.vectorCsr.getOrElse(false.B), setVL, 0.U) when(io.retire(0) && io.vectorCsr.get) { vector.get.states("vl") := setVL @@ -1517,7 +1723,7 @@ class CSRFile( setCustomCSR(io, csr, reg) } - when(reset.asBool) { + when(io.reset.asBool) { reg_satp.mode := 0.U reg_vsatp.mode := 0.U reg_hgatp.mode := 0.U @@ -1546,15 +1752,19 @@ class CSRFile( if (nBreakpoints <= 1) reg_tselect := 0.U for (bpc <- reg_bp.map { _.control }) { - bpc.ttype := bpc.tType.U - bpc.maskmax := bpc.maskMax.U + def tType = 2 + def maskMax = 4 + // bpc.ttype := bpc.tType.U + bpc.ttype := tType.U + // bpc.maskmax := bpc.maskMax.U + bpc.maskmax := maskMax.U bpc.reserved := 0.U bpc.zero := 0.U bpc.h := false.B if (!usingSupervisor) bpc.s := false.B if (!usingUser) bpc.u := false.B if (!usingSupervisor && !usingUser) bpc.m := true.B - when(reset.asBool) { + when(io.reset.asBool) { bpc.action := 0.U bpc.dmode := false.B bpc.chain := false.B @@ -1568,10 +1778,14 @@ class CSRFile( if (coreParams.scontextWidth == 0) bpx.sselect := false.B } for (bp <- reg_bp.drop(nBreakpoints)) - bp := 0.U.asTypeOf(new BP()) + bp := 0.U.asTypeOf(new BP(xLen, useBPWatch, vaddrBits, coreParams.mcontextWidth, coreParams.scontextWidth)) for (pmp <- reg_pmp) { pmp.cfg.res := 0.U - when(reset.asBool) { pmp.reset() } + def resetPMP(pmp: PMPReg): Unit = { + pmp.cfg.a := 0.U + pmp.cfg.l := 0.U + } + when(io.reset.asBool) { resetPMP(pmp) } } def chooseInterrupt(masksIn: Seq[UInt]): (Bool, UInt) = { @@ -1601,15 +1815,17 @@ class CSRFile( def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = { if (xLen == 32) { val hi = lo + CSRs.mcycleh - CSRs.mcycle - when(decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth - 1, 32), wdata) } - when(decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth - 33, 0), ctr(31, 0)) } + when(decoded_addr(lo)) { ctr.assign(Cat(ctr.value(ctr.width - 1, 32), wdata)) } + when(decoded_addr(hi)) { ctr.assign(Cat(wdata(ctr.width - 33, 0), ctr.value(31, 0))) } } else { - when(decoded_addr(lo)) { ctr := wdata(ctr.getWidth - 1, 0) } + when(decoded_addr(lo)) { ctr.assign(wdata(ctr.width - 1, 0)) } } } - def formEPC(x: UInt) = ~(~x | (if (usingCompressed) 1.U else 3.U)) - def readEPC(x: UInt) = ~(~x | Mux(reg_misa('c' - 'a'), 1.U, 3.U)) - def formTVec(x: UInt) = x.andNot(Mux(x(0), ((((BigInt(1) << mtvecInterruptAlign) - 1) << mtvecBaseAlign) | 2).U, 2.U)) - def isaStringToMask(s: String) = s.map(x => 1 << (x - 'A')).foldLeft(0)(_ | _) - def formFS(fs: UInt) = if (coreParams.haveFSDirty) fs else Fill(2, fs.orR) + def andNot(x: UInt, y: UInt): UInt = x & ~(y | (x & 0.U)) + def formEPC(x: UInt): UInt = ~(~x | (if (usingCompressed) 1.U else 3.U)) + def readEPC(x: UInt): UInt = ~(~x | Mux(reg_misa('c' - 'a'), 1.U, 3.U)) + def formTVec(x: UInt): UInt = + andNot(x, Mux(x(0), ((((BigInt(1) << mtvecInterruptAlign) - 1) << mtvecBaseAlign) | 2).U, 2.U)) + def isaStringToMask(s: String): Int = s.map(x => 1 << (x - 'A')).foldLeft(0)(_ | _) + def formFS(fs: UInt): UInt = if (coreParams.haveFSDirty) fs else Fill(2, fs.orR) } diff --git a/rocketv/src/DecodeLogic.scala b/rocketv/src/DecodeLogic.scala new file mode 100644 index 000000000..ad835f56d --- /dev/null +++ b/rocketv/src/DecodeLogic.scala @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.BitPat +import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable} + +// compatibility layer. +object DecodeLogic +{ + // TODO This should be a method on BitPat + private def hasDontCare(bp: BitPat): Boolean = bp.mask.bitCount != bp.width + // Pads BitPats that are safe to pad (no don't cares), errors otherwise + private def padBP(bp: BitPat, width: Int): BitPat = { + if (bp.width == width) bp + else { + require(!hasDontCare(bp), s"Cannot pad '$bp' to '$width' bits because it has don't cares") + val diff = width - bp.width + require(diff > 0, s"Cannot pad '$bp' to '$width' because it is already '${bp.width}' bits wide!") + BitPat(0.U(diff.W)) ## bp + } + } + + def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = + chisel3.util.experimental.decode.decoder(QMCMinimizer, addr, TruthTable(mapping, default)) + def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = { + val nElts = default.size + require(mappingIn.forall(_._2.size == nElts), + s"All Seq[BitPat] must be of the same length, got $nElts vs. ${mappingIn.find(_._2.size != nElts).get}" + ) + + val elementsGrouped = mappingIn.map(_._2).transpose + val elementWidths = elementsGrouped.zip(default).map { case (elts, default) => + (default :: elts.toList).map(_.getWidth).max + } + val resultWidth = elementWidths.sum + + val elementIndices = elementWidths.scan(resultWidth - 1) { case (l, r) => l - r } + + // All BitPats that correspond to a given element in the result must have the same width in the + // chisel3 decoder. We will zero pad any BitPats that are too small so long as they dont have + // any don't cares. If there are don't cares, it is an error and the user needs to pad the + // BitPat themselves + val defaultsPadded = default.zip(elementWidths).map { case (bp, w) => padBP(bp, w) } + val mappingInPadded = mappingIn.map { case (in, elts) => + in -> elts.zip(elementWidths).map { case (bp, w) => padBP(bp, w) } + } + val decoded = apply(addr, defaultsPadded.reduce(_ ## _), mappingInPadded.map { case (in, out) => (in, out.reduce(_ ## _)) }) + + elementIndices.zip(elementIndices.tail).map { case (msb, lsb) => decoded(msb, lsb + 1) }.toList + } + def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] = + apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]]) + def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = + apply(addr, BitPat.dontCare(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).asBool +} diff --git a/rocketv/src/RVDecoderDB.scala b/rocketv/src/RVDecoderDB.scala new file mode 100644 index 000000000..8b0d3387f --- /dev/null +++ b/rocketv/src/RVDecoderDB.scala @@ -0,0 +1,949 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +// The compatibility layer to bridge rvdecoderdb and codegen instructions. +// In the future, this file is going to be removed. +object rvdecoderdbcompat { + val rvdecoderdbPath = org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + + val causes = org.chipsalliance.rvdecoderdb.causes(rvdecoderdbPath) + object Causes { + val misaligned_fetch = causes("misaligned fetch") + val fetch_access = causes("fetch access") + val illegal_instruction = causes("illegal instruction") + val breakpoint = causes("breakpoint") + val misaligned_load = causes("misaligned load") + val load_access = causes("load access") + val misaligned_store = causes("misaligned store") + val store_access = causes("store access") + val user_ecall = causes("user ecall") + val supervisor_ecall = causes("supervisor ecall") + val virtual_supervisor_ecall = causes("virtual supervisor ecall") + val machine_ecall = causes("machine ecall") + val fetch_page_fault = causes("fetch page fault") + val load_page_fault = causes("load page fault") + val store_page_fault = causes("store page fault") + val fetch_guest_page_fault = causes("fetch guest page fault") + val load_guest_page_fault = causes("load guest page fault") + val virtual_instruction = causes("virtual instruction") + val store_guest_page_fault = causes("store guest page fault") + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += misaligned_fetch + res += fetch_access + res += illegal_instruction + res += breakpoint + res += misaligned_load + res += load_access + res += misaligned_store + res += store_access + res += user_ecall + res += supervisor_ecall + res += virtual_supervisor_ecall + res += machine_ecall + res += fetch_page_fault + res += load_page_fault + res += store_page_fault + res += fetch_guest_page_fault + res += load_guest_page_fault + res += virtual_instruction + res += store_guest_page_fault + res.toArray + } + } + + val csrs = org.chipsalliance.rvdecoderdb.csrs(rvdecoderdbPath).toMap + val csrs32 = org.chipsalliance.rvdecoderdb.csrs(rvdecoderdbPath).toMap + object CSRs { + val fflags = csrs("fflags") + val frm = csrs("frm") + val fcsr = csrs("fcsr") + val vstart = csrs("vstart") + val vxsat = csrs("vxsat") + val vxrm = csrs("vxrm") + val vcsr = csrs("vcsr") + val seed = csrs("seed") + val jvt = csrs("jvt") + val cycle = csrs("cycle") + val time = csrs("time") + val instret = csrs("instret") + val hpmcounter3 = csrs("hpmcounter3") + val hpmcounter4 = csrs("hpmcounter4") + val hpmcounter5 = csrs("hpmcounter5") + val hpmcounter6 = csrs("hpmcounter6") + val hpmcounter7 = csrs("hpmcounter7") + val hpmcounter8 = csrs("hpmcounter8") + val hpmcounter9 = csrs("hpmcounter9") + val hpmcounter10 = csrs("hpmcounter10") + val hpmcounter11 = csrs("hpmcounter11") + val hpmcounter12 = csrs("hpmcounter12") + val hpmcounter13 = csrs("hpmcounter13") + val hpmcounter14 = csrs("hpmcounter14") + val hpmcounter15 = csrs("hpmcounter15") + val hpmcounter16 = csrs("hpmcounter16") + val hpmcounter17 = csrs("hpmcounter17") + val hpmcounter18 = csrs("hpmcounter18") + val hpmcounter19 = csrs("hpmcounter19") + val hpmcounter20 = csrs("hpmcounter20") + val hpmcounter21 = csrs("hpmcounter21") + val hpmcounter22 = csrs("hpmcounter22") + val hpmcounter23 = csrs("hpmcounter23") + val hpmcounter24 = csrs("hpmcounter24") + val hpmcounter25 = csrs("hpmcounter25") + val hpmcounter26 = csrs("hpmcounter26") + val hpmcounter27 = csrs("hpmcounter27") + val hpmcounter28 = csrs("hpmcounter28") + val hpmcounter29 = csrs("hpmcounter29") + val hpmcounter30 = csrs("hpmcounter30") + val hpmcounter31 = csrs("hpmcounter31") + val vl = csrs("vl") + val vtype = csrs("vtype") + val vlenb = csrs("vlenb") + val sstatus = csrs("sstatus") + val sedeleg = csrs("sedeleg") + val sideleg = csrs("sideleg") + val sie = csrs("sie") + val stvec = csrs("stvec") + val scounteren = csrs("scounteren") + val senvcfg = csrs("senvcfg") + val sstateen0 = csrs("sstateen0") + val sstateen1 = csrs("sstateen1") + val sstateen2 = csrs("sstateen2") + val sstateen3 = csrs("sstateen3") + val sscratch = csrs("sscratch") + val sepc = csrs("sepc") + val scause = csrs("scause") + val stval = csrs("stval") + val sip = csrs("sip") + val stimecmp = csrs("stimecmp") + val siselect = csrs("siselect") + val sireg = csrs("sireg") + val stopei = csrs("stopei") + val satp = csrs("satp") + val scontext = csrs("scontext") + val vsstatus = csrs("vsstatus") + val vsie = csrs("vsie") + val vstvec = csrs("vstvec") + val vsscratch = csrs("vsscratch") + val vsepc = csrs("vsepc") + val vscause = csrs("vscause") + val vstval = csrs("vstval") + val vsip = csrs("vsip") + val vstimecmp = csrs("vstimecmp") + val vsiselect = csrs("vsiselect") + val vsireg = csrs("vsireg") + val vstopei = csrs("vstopei") + val vsatp = csrs("vsatp") + val hstatus = csrs("hstatus") + val hedeleg = csrs("hedeleg") + val hideleg = csrs("hideleg") + val hie = csrs("hie") + val htimedelta = csrs("htimedelta") + val hcounteren = csrs("hcounteren") + val hgeie = csrs("hgeie") + val hvien = csrs("hvien") + val hvictl = csrs("hvictl") + val henvcfg = csrs("henvcfg") + val hstateen0 = csrs("hstateen0") + val hstateen1 = csrs("hstateen1") + val hstateen2 = csrs("hstateen2") + val hstateen3 = csrs("hstateen3") + val htval = csrs("htval") + val hip = csrs("hip") + val hvip = csrs("hvip") + val hviprio1 = csrs("hviprio1") + val hviprio2 = csrs("hviprio2") + val htinst = csrs("htinst") + val hgatp = csrs("hgatp") + val hcontext = csrs("hcontext") + val hgeip = csrs("hgeip") + val vstopi = csrs("vstopi") + val scountovf = csrs("scountovf") + val stopi = csrs("stopi") + val utvt = csrs("utvt") + val unxti = csrs("unxti") + val uintstatus = csrs("uintstatus") + val uscratchcsw = csrs("uscratchcsw") + val uscratchcswl = csrs("uscratchcswl") + val stvt = csrs("stvt") + val snxti = csrs("snxti") + val sintstatus = csrs("sintstatus") + val sscratchcsw = csrs("sscratchcsw") + val sscratchcswl = csrs("sscratchcswl") + val mtvt = csrs("mtvt") + val mnxti = csrs("mnxti") + val mintstatus = csrs("mintstatus") + val mscratchcsw = csrs("mscratchcsw") + val mscratchcswl = csrs("mscratchcswl") + val mstatus = csrs("mstatus") + val misa = csrs("misa") + val medeleg = csrs("medeleg") + val mideleg = csrs("mideleg") + val mie = csrs("mie") + val mtvec = csrs("mtvec") + val mcounteren = csrs("mcounteren") + val mvien = csrs("mvien") + val mvip = csrs("mvip") + val menvcfg = csrs("menvcfg") + val mstateen0 = csrs("mstateen0") + val mstateen1 = csrs("mstateen1") + val mstateen2 = csrs("mstateen2") + val mstateen3 = csrs("mstateen3") + val mcountinhibit = csrs("mcountinhibit") + val mscratch = csrs("mscratch") + val mepc = csrs("mepc") + val mcause = csrs("mcause") + val mtval = csrs("mtval") + val mip = csrs("mip") + val mtinst = csrs("mtinst") + val mtval2 = csrs("mtval2") + val miselect = csrs("miselect") + val mireg = csrs("mireg") + val mtopei = csrs("mtopei") + val pmpcfg0 = csrs("pmpcfg0") + val pmpcfg1 = csrs("pmpcfg1") + val pmpcfg2 = csrs("pmpcfg2") + val pmpcfg3 = csrs("pmpcfg3") + val pmpcfg4 = csrs("pmpcfg4") + val pmpcfg5 = csrs("pmpcfg5") + val pmpcfg6 = csrs("pmpcfg6") + val pmpcfg7 = csrs("pmpcfg7") + val pmpcfg8 = csrs("pmpcfg8") + val pmpcfg9 = csrs("pmpcfg9") + val pmpcfg10 = csrs("pmpcfg10") + val pmpcfg11 = csrs("pmpcfg11") + val pmpcfg12 = csrs("pmpcfg12") + val pmpcfg13 = csrs("pmpcfg13") + val pmpcfg14 = csrs("pmpcfg14") + val pmpcfg15 = csrs("pmpcfg15") + val pmpaddr0 = csrs("pmpaddr0") + val pmpaddr1 = csrs("pmpaddr1") + val pmpaddr2 = csrs("pmpaddr2") + val pmpaddr3 = csrs("pmpaddr3") + val pmpaddr4 = csrs("pmpaddr4") + val pmpaddr5 = csrs("pmpaddr5") + val pmpaddr6 = csrs("pmpaddr6") + val pmpaddr7 = csrs("pmpaddr7") + val pmpaddr8 = csrs("pmpaddr8") + val pmpaddr9 = csrs("pmpaddr9") + val pmpaddr10 = csrs("pmpaddr10") + val pmpaddr11 = csrs("pmpaddr11") + val pmpaddr12 = csrs("pmpaddr12") + val pmpaddr13 = csrs("pmpaddr13") + val pmpaddr14 = csrs("pmpaddr14") + val pmpaddr15 = csrs("pmpaddr15") + val pmpaddr16 = csrs("pmpaddr16") + val pmpaddr17 = csrs("pmpaddr17") + val pmpaddr18 = csrs("pmpaddr18") + val pmpaddr19 = csrs("pmpaddr19") + val pmpaddr20 = csrs("pmpaddr20") + val pmpaddr21 = csrs("pmpaddr21") + val pmpaddr22 = csrs("pmpaddr22") + val pmpaddr23 = csrs("pmpaddr23") + val pmpaddr24 = csrs("pmpaddr24") + val pmpaddr25 = csrs("pmpaddr25") + val pmpaddr26 = csrs("pmpaddr26") + val pmpaddr27 = csrs("pmpaddr27") + val pmpaddr28 = csrs("pmpaddr28") + val pmpaddr29 = csrs("pmpaddr29") + val pmpaddr30 = csrs("pmpaddr30") + val pmpaddr31 = csrs("pmpaddr31") + val pmpaddr32 = csrs("pmpaddr32") + val pmpaddr33 = csrs("pmpaddr33") + val pmpaddr34 = csrs("pmpaddr34") + val pmpaddr35 = csrs("pmpaddr35") + val pmpaddr36 = csrs("pmpaddr36") + val pmpaddr37 = csrs("pmpaddr37") + val pmpaddr38 = csrs("pmpaddr38") + val pmpaddr39 = csrs("pmpaddr39") + val pmpaddr40 = csrs("pmpaddr40") + val pmpaddr41 = csrs("pmpaddr41") + val pmpaddr42 = csrs("pmpaddr42") + val pmpaddr43 = csrs("pmpaddr43") + val pmpaddr44 = csrs("pmpaddr44") + val pmpaddr45 = csrs("pmpaddr45") + val pmpaddr46 = csrs("pmpaddr46") + val pmpaddr47 = csrs("pmpaddr47") + val pmpaddr48 = csrs("pmpaddr48") + val pmpaddr49 = csrs("pmpaddr49") + val pmpaddr50 = csrs("pmpaddr50") + val pmpaddr51 = csrs("pmpaddr51") + val pmpaddr52 = csrs("pmpaddr52") + val pmpaddr53 = csrs("pmpaddr53") + val pmpaddr54 = csrs("pmpaddr54") + val pmpaddr55 = csrs("pmpaddr55") + val pmpaddr56 = csrs("pmpaddr56") + val pmpaddr57 = csrs("pmpaddr57") + val pmpaddr58 = csrs("pmpaddr58") + val pmpaddr59 = csrs("pmpaddr59") + val pmpaddr60 = csrs("pmpaddr60") + val pmpaddr61 = csrs("pmpaddr61") + val pmpaddr62 = csrs("pmpaddr62") + val pmpaddr63 = csrs("pmpaddr63") + val mseccfg = csrs("mseccfg") + val tselect = csrs("tselect") + val tdata1 = csrs("tdata1") + val tdata2 = csrs("tdata2") + val tdata3 = csrs("tdata3") + val tinfo = csrs("tinfo") + val tcontrol = csrs("tcontrol") + val mcontext = csrs("mcontext") + val mscontext = csrs("mscontext") + val dcsr = csrs("dcsr") + val dpc = csrs("dpc") + val dscratch0 = csrs("dscratch0") + val dscratch1 = csrs("dscratch1") + val mcycle = csrs("mcycle") + val minstret = csrs("minstret") + val mhpmcounter3 = csrs("mhpmcounter3") + val mhpmcounter4 = csrs("mhpmcounter4") + val mhpmcounter5 = csrs("mhpmcounter5") + val mhpmcounter6 = csrs("mhpmcounter6") + val mhpmcounter7 = csrs("mhpmcounter7") + val mhpmcounter8 = csrs("mhpmcounter8") + val mhpmcounter9 = csrs("mhpmcounter9") + val mhpmcounter10 = csrs("mhpmcounter10") + val mhpmcounter11 = csrs("mhpmcounter11") + val mhpmcounter12 = csrs("mhpmcounter12") + val mhpmcounter13 = csrs("mhpmcounter13") + val mhpmcounter14 = csrs("mhpmcounter14") + val mhpmcounter15 = csrs("mhpmcounter15") + val mhpmcounter16 = csrs("mhpmcounter16") + val mhpmcounter17 = csrs("mhpmcounter17") + val mhpmcounter18 = csrs("mhpmcounter18") + val mhpmcounter19 = csrs("mhpmcounter19") + val mhpmcounter20 = csrs("mhpmcounter20") + val mhpmcounter21 = csrs("mhpmcounter21") + val mhpmcounter22 = csrs("mhpmcounter22") + val mhpmcounter23 = csrs("mhpmcounter23") + val mhpmcounter24 = csrs("mhpmcounter24") + val mhpmcounter25 = csrs("mhpmcounter25") + val mhpmcounter26 = csrs("mhpmcounter26") + val mhpmcounter27 = csrs("mhpmcounter27") + val mhpmcounter28 = csrs("mhpmcounter28") + val mhpmcounter29 = csrs("mhpmcounter29") + val mhpmcounter30 = csrs("mhpmcounter30") + val mhpmcounter31 = csrs("mhpmcounter31") + val mhpmevent3 = csrs("mhpmevent3") + val mhpmevent4 = csrs("mhpmevent4") + val mhpmevent5 = csrs("mhpmevent5") + val mhpmevent6 = csrs("mhpmevent6") + val mhpmevent7 = csrs("mhpmevent7") + val mhpmevent8 = csrs("mhpmevent8") + val mhpmevent9 = csrs("mhpmevent9") + val mhpmevent10 = csrs("mhpmevent10") + val mhpmevent11 = csrs("mhpmevent11") + val mhpmevent12 = csrs("mhpmevent12") + val mhpmevent13 = csrs("mhpmevent13") + val mhpmevent14 = csrs("mhpmevent14") + val mhpmevent15 = csrs("mhpmevent15") + val mhpmevent16 = csrs("mhpmevent16") + val mhpmevent17 = csrs("mhpmevent17") + val mhpmevent18 = csrs("mhpmevent18") + val mhpmevent19 = csrs("mhpmevent19") + val mhpmevent20 = csrs("mhpmevent20") + val mhpmevent21 = csrs("mhpmevent21") + val mhpmevent22 = csrs("mhpmevent22") + val mhpmevent23 = csrs("mhpmevent23") + val mhpmevent24 = csrs("mhpmevent24") + val mhpmevent25 = csrs("mhpmevent25") + val mhpmevent26 = csrs("mhpmevent26") + val mhpmevent27 = csrs("mhpmevent27") + val mhpmevent28 = csrs("mhpmevent28") + val mhpmevent29 = csrs("mhpmevent29") + val mhpmevent30 = csrs("mhpmevent30") + val mhpmevent31 = csrs("mhpmevent31") + val mvendorid = csrs("mvendorid") + val marchid = csrs("marchid") + val mimpid = csrs("mimpid") + val mhartid = csrs("mhartid") + val mconfigptr = csrs("mconfigptr") + val mtopi = csrs("mtopi") + + val sieh = csrs32("sieh") + val siph = csrs32("siph") + val stimecmph = csrs32("stimecmph") + val vsieh = csrs32("vsieh") + val vsiph = csrs32("vsiph") + val vstimecmph = csrs32("vstimecmph") + val htimedeltah = csrs32("htimedeltah") + val hidelegh = csrs32("hidelegh") + val hvienh = csrs32("hvienh") + val henvcfgh = csrs32("henvcfgh") + val hviph = csrs32("hviph") + val hviprio1h = csrs32("hviprio1h") + val hviprio2h = csrs32("hviprio2h") + val hstateen0h = csrs32("hstateen0h") + val hstateen1h = csrs32("hstateen1h") + val hstateen2h = csrs32("hstateen2h") + val hstateen3h = csrs32("hstateen3h") + val cycleh = csrs32("cycleh") + val timeh = csrs32("timeh") + val instreth = csrs32("instreth") + val hpmcounter3h = csrs32("hpmcounter3h") + val hpmcounter4h = csrs32("hpmcounter4h") + val hpmcounter5h = csrs32("hpmcounter5h") + val hpmcounter6h = csrs32("hpmcounter6h") + val hpmcounter7h = csrs32("hpmcounter7h") + val hpmcounter8h = csrs32("hpmcounter8h") + val hpmcounter9h = csrs32("hpmcounter9h") + val hpmcounter10h = csrs32("hpmcounter10h") + val hpmcounter11h = csrs32("hpmcounter11h") + val hpmcounter12h = csrs32("hpmcounter12h") + val hpmcounter13h = csrs32("hpmcounter13h") + val hpmcounter14h = csrs32("hpmcounter14h") + val hpmcounter15h = csrs32("hpmcounter15h") + val hpmcounter16h = csrs32("hpmcounter16h") + val hpmcounter17h = csrs32("hpmcounter17h") + val hpmcounter18h = csrs32("hpmcounter18h") + val hpmcounter19h = csrs32("hpmcounter19h") + val hpmcounter20h = csrs32("hpmcounter20h") + val hpmcounter21h = csrs32("hpmcounter21h") + val hpmcounter22h = csrs32("hpmcounter22h") + val hpmcounter23h = csrs32("hpmcounter23h") + val hpmcounter24h = csrs32("hpmcounter24h") + val hpmcounter25h = csrs32("hpmcounter25h") + val hpmcounter26h = csrs32("hpmcounter26h") + val hpmcounter27h = csrs32("hpmcounter27h") + val hpmcounter28h = csrs32("hpmcounter28h") + val hpmcounter29h = csrs32("hpmcounter29h") + val hpmcounter30h = csrs32("hpmcounter30h") + val hpmcounter31h = csrs32("hpmcounter31h") + val mstatush = csrs32("mstatush") + val midelegh = csrs32("midelegh") + val mieh = csrs32("mieh") + val mvienh = csrs32("mvienh") + val mviph = csrs32("mviph") + val menvcfgh = csrs32("menvcfgh") + val mstateen0h = csrs32("mstateen0h") + val mstateen1h = csrs32("mstateen1h") + val mstateen2h = csrs32("mstateen2h") + val mstateen3h = csrs32("mstateen3h") + val miph = csrs32("miph") + val mhpmevent3h = csrs32("mhpmevent3h") + val mhpmevent4h = csrs32("mhpmevent4h") + val mhpmevent5h = csrs32("mhpmevent5h") + val mhpmevent6h = csrs32("mhpmevent6h") + val mhpmevent7h = csrs32("mhpmevent7h") + val mhpmevent8h = csrs32("mhpmevent8h") + val mhpmevent9h = csrs32("mhpmevent9h") + val mhpmevent10h = csrs32("mhpmevent10h") + val mhpmevent11h = csrs32("mhpmevent11h") + val mhpmevent12h = csrs32("mhpmevent12h") + val mhpmevent13h = csrs32("mhpmevent13h") + val mhpmevent14h = csrs32("mhpmevent14h") + val mhpmevent15h = csrs32("mhpmevent15h") + val mhpmevent16h = csrs32("mhpmevent16h") + val mhpmevent17h = csrs32("mhpmevent17h") + val mhpmevent18h = csrs32("mhpmevent18h") + val mhpmevent19h = csrs32("mhpmevent19h") + val mhpmevent20h = csrs32("mhpmevent20h") + val mhpmevent21h = csrs32("mhpmevent21h") + val mhpmevent22h = csrs32("mhpmevent22h") + val mhpmevent23h = csrs32("mhpmevent23h") + val mhpmevent24h = csrs32("mhpmevent24h") + val mhpmevent25h = csrs32("mhpmevent25h") + val mhpmevent26h = csrs32("mhpmevent26h") + val mhpmevent27h = csrs32("mhpmevent27h") + val mhpmevent28h = csrs32("mhpmevent28h") + val mhpmevent29h = csrs32("mhpmevent29h") + val mhpmevent30h = csrs32("mhpmevent30h") + val mhpmevent31h = csrs32("mhpmevent31h") + val mnscratch = csrs32("mnscratch") + val mnepc = csrs32("mnepc") + val mncause = csrs32("mncause") + val mnstatus = csrs32("mnstatus") + val mseccfgh = csrs32("mseccfgh") + val mcycleh = csrs32("mcycleh") + val minstreth = csrs32("minstreth") + val mhpmcounter3h = csrs32("mhpmcounter3h") + val mhpmcounter4h = csrs32("mhpmcounter4h") + val mhpmcounter5h = csrs32("mhpmcounter5h") + val mhpmcounter6h = csrs32("mhpmcounter6h") + val mhpmcounter7h = csrs32("mhpmcounter7h") + val mhpmcounter8h = csrs32("mhpmcounter8h") + val mhpmcounter9h = csrs32("mhpmcounter9h") + val mhpmcounter10h = csrs32("mhpmcounter10h") + val mhpmcounter11h = csrs32("mhpmcounter11h") + val mhpmcounter12h = csrs32("mhpmcounter12h") + val mhpmcounter13h = csrs32("mhpmcounter13h") + val mhpmcounter14h = csrs32("mhpmcounter14h") + val mhpmcounter15h = csrs32("mhpmcounter15h") + val mhpmcounter16h = csrs32("mhpmcounter16h") + val mhpmcounter17h = csrs32("mhpmcounter17h") + val mhpmcounter18h = csrs32("mhpmcounter18h") + val mhpmcounter19h = csrs32("mhpmcounter19h") + val mhpmcounter20h = csrs32("mhpmcounter20h") + val mhpmcounter21h = csrs32("mhpmcounter21h") + val mhpmcounter22h = csrs32("mhpmcounter22h") + val mhpmcounter23h = csrs32("mhpmcounter23h") + val mhpmcounter24h = csrs32("mhpmcounter24h") + val mhpmcounter25h = csrs32("mhpmcounter25h") + val mhpmcounter26h = csrs32("mhpmcounter26h") + val mhpmcounter27h = csrs32("mhpmcounter27h") + val mhpmcounter28h = csrs32("mhpmcounter28h") + val mhpmcounter29h = csrs32("mhpmcounter29h") + val mhpmcounter30h = csrs32("mhpmcounter30h") + val mhpmcounter31h = csrs32("mhpmcounter31h") + + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += fflags + res += frm + res += fcsr + res += vstart + res += vxsat + res += vxrm + res += vcsr + res += seed + res += jvt + res += cycle + res += time + res += instret + res += hpmcounter3 + res += hpmcounter4 + res += hpmcounter5 + res += hpmcounter6 + res += hpmcounter7 + res += hpmcounter8 + res += hpmcounter9 + res += hpmcounter10 + res += hpmcounter11 + res += hpmcounter12 + res += hpmcounter13 + res += hpmcounter14 + res += hpmcounter15 + res += hpmcounter16 + res += hpmcounter17 + res += hpmcounter18 + res += hpmcounter19 + res += hpmcounter20 + res += hpmcounter21 + res += hpmcounter22 + res += hpmcounter23 + res += hpmcounter24 + res += hpmcounter25 + res += hpmcounter26 + res += hpmcounter27 + res += hpmcounter28 + res += hpmcounter29 + res += hpmcounter30 + res += hpmcounter31 + res += vl + res += vtype + res += vlenb + res += sstatus + res += sedeleg + res += sideleg + res += sie + res += stvec + res += scounteren + res += senvcfg + res += sstateen0 + res += sstateen1 + res += sstateen2 + res += sstateen3 + res += sscratch + res += sepc + res += scause + res += stval + res += sip + res += stimecmp + res += siselect + res += sireg + res += stopei + res += satp + res += scontext + res += vsstatus + res += vsie + res += vstvec + res += vsscratch + res += vsepc + res += vscause + res += vstval + res += vsip + res += vstimecmp + res += vsiselect + res += vsireg + res += vstopei + res += vsatp + res += hstatus + res += hedeleg + res += hideleg + res += hie + res += htimedelta + res += hcounteren + res += hgeie + res += hvien + res += hvictl + res += henvcfg + res += hstateen0 + res += hstateen1 + res += hstateen2 + res += hstateen3 + res += htval + res += hip + res += hvip + res += hviprio1 + res += hviprio2 + res += htinst + res += hgatp + res += hcontext + res += hgeip + res += vstopi + res += scountovf + res += stopi + res += utvt + res += unxti + res += uintstatus + res += uscratchcsw + res += uscratchcswl + res += stvt + res += snxti + res += sintstatus + res += sscratchcsw + res += sscratchcswl + res += mtvt + res += mnxti + res += mintstatus + res += mscratchcsw + res += mscratchcswl + res += mstatus + res += misa + res += medeleg + res += mideleg + res += mie + res += mtvec + res += mcounteren + res += mvien + res += mvip + res += menvcfg + res += mstateen0 + res += mstateen1 + res += mstateen2 + res += mstateen3 + res += mcountinhibit + res += mscratch + res += mepc + res += mcause + res += mtval + res += mip + res += mtinst + res += mtval2 + res += miselect + res += mireg + res += mtopei + res += pmpcfg0 + res += pmpcfg1 + res += pmpcfg2 + res += pmpcfg3 + res += pmpcfg4 + res += pmpcfg5 + res += pmpcfg6 + res += pmpcfg7 + res += pmpcfg8 + res += pmpcfg9 + res += pmpcfg10 + res += pmpcfg11 + res += pmpcfg12 + res += pmpcfg13 + res += pmpcfg14 + res += pmpcfg15 + res += pmpaddr0 + res += pmpaddr1 + res += pmpaddr2 + res += pmpaddr3 + res += pmpaddr4 + res += pmpaddr5 + res += pmpaddr6 + res += pmpaddr7 + res += pmpaddr8 + res += pmpaddr9 + res += pmpaddr10 + res += pmpaddr11 + res += pmpaddr12 + res += pmpaddr13 + res += pmpaddr14 + res += pmpaddr15 + res += pmpaddr16 + res += pmpaddr17 + res += pmpaddr18 + res += pmpaddr19 + res += pmpaddr20 + res += pmpaddr21 + res += pmpaddr22 + res += pmpaddr23 + res += pmpaddr24 + res += pmpaddr25 + res += pmpaddr26 + res += pmpaddr27 + res += pmpaddr28 + res += pmpaddr29 + res += pmpaddr30 + res += pmpaddr31 + res += pmpaddr32 + res += pmpaddr33 + res += pmpaddr34 + res += pmpaddr35 + res += pmpaddr36 + res += pmpaddr37 + res += pmpaddr38 + res += pmpaddr39 + res += pmpaddr40 + res += pmpaddr41 + res += pmpaddr42 + res += pmpaddr43 + res += pmpaddr44 + res += pmpaddr45 + res += pmpaddr46 + res += pmpaddr47 + res += pmpaddr48 + res += pmpaddr49 + res += pmpaddr50 + res += pmpaddr51 + res += pmpaddr52 + res += pmpaddr53 + res += pmpaddr54 + res += pmpaddr55 + res += pmpaddr56 + res += pmpaddr57 + res += pmpaddr58 + res += pmpaddr59 + res += pmpaddr60 + res += pmpaddr61 + res += pmpaddr62 + res += pmpaddr63 + res += mseccfg + res += tselect + res += tdata1 + res += tdata2 + res += tdata3 + res += tinfo + res += tcontrol + res += mcontext + res += mscontext + res += dcsr + res += dpc + res += dscratch0 + res += dscratch1 + res += mcycle + res += minstret + res += mhpmcounter3 + res += mhpmcounter4 + res += mhpmcounter5 + res += mhpmcounter6 + res += mhpmcounter7 + res += mhpmcounter8 + res += mhpmcounter9 + res += mhpmcounter10 + res += mhpmcounter11 + res += mhpmcounter12 + res += mhpmcounter13 + res += mhpmcounter14 + res += mhpmcounter15 + res += mhpmcounter16 + res += mhpmcounter17 + res += mhpmcounter18 + res += mhpmcounter19 + res += mhpmcounter20 + res += mhpmcounter21 + res += mhpmcounter22 + res += mhpmcounter23 + res += mhpmcounter24 + res += mhpmcounter25 + res += mhpmcounter26 + res += mhpmcounter27 + res += mhpmcounter28 + res += mhpmcounter29 + res += mhpmcounter30 + res += mhpmcounter31 + res += mhpmevent3 + res += mhpmevent4 + res += mhpmevent5 + res += mhpmevent6 + res += mhpmevent7 + res += mhpmevent8 + res += mhpmevent9 + res += mhpmevent10 + res += mhpmevent11 + res += mhpmevent12 + res += mhpmevent13 + res += mhpmevent14 + res += mhpmevent15 + res += mhpmevent16 + res += mhpmevent17 + res += mhpmevent18 + res += mhpmevent19 + res += mhpmevent20 + res += mhpmevent21 + res += mhpmevent22 + res += mhpmevent23 + res += mhpmevent24 + res += mhpmevent25 + res += mhpmevent26 + res += mhpmevent27 + res += mhpmevent28 + res += mhpmevent29 + res += mhpmevent30 + res += mhpmevent31 + res += mvendorid + res += marchid + res += mimpid + res += mhartid + res += mconfigptr + res += mtopi + res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all: _*) + res += sieh + res += siph + res += stimecmph + res += vsieh + res += vsiph + res += vstimecmph + res += htimedeltah + res += hidelegh + res += hvienh + res += henvcfgh + res += hviph + res += hviprio1h + res += hviprio2h + res += hstateen0h + res += hstateen1h + res += hstateen2h + res += hstateen3h + res += cycleh + res += timeh + res += instreth + res += hpmcounter3h + res += hpmcounter4h + res += hpmcounter5h + res += hpmcounter6h + res += hpmcounter7h + res += hpmcounter8h + res += hpmcounter9h + res += hpmcounter10h + res += hpmcounter11h + res += hpmcounter12h + res += hpmcounter13h + res += hpmcounter14h + res += hpmcounter15h + res += hpmcounter16h + res += hpmcounter17h + res += hpmcounter18h + res += hpmcounter19h + res += hpmcounter20h + res += hpmcounter21h + res += hpmcounter22h + res += hpmcounter23h + res += hpmcounter24h + res += hpmcounter25h + res += hpmcounter26h + res += hpmcounter27h + res += hpmcounter28h + res += hpmcounter29h + res += hpmcounter30h + res += hpmcounter31h + res += mstatush + res += midelegh + res += mieh + res += mvienh + res += mviph + res += menvcfgh + res += mstateen0h + res += mstateen1h + res += mstateen2h + res += mstateen3h + res += miph + res += mhpmevent3h + res += mhpmevent4h + res += mhpmevent5h + res += mhpmevent6h + res += mhpmevent7h + res += mhpmevent8h + res += mhpmevent9h + res += mhpmevent10h + res += mhpmevent11h + res += mhpmevent12h + res += mhpmevent13h + res += mhpmevent14h + res += mhpmevent15h + res += mhpmevent16h + res += mhpmevent17h + res += mhpmevent18h + res += mhpmevent19h + res += mhpmevent20h + res += mhpmevent21h + res += mhpmevent22h + res += mhpmevent23h + res += mhpmevent24h + res += mhpmevent25h + res += mhpmevent26h + res += mhpmevent27h + res += mhpmevent28h + res += mhpmevent29h + res += mhpmevent30h + res += mhpmevent31h + res += mnscratch + res += mnepc + res += mncause + res += mnstatus + res += mseccfgh + res += mcycleh + res += minstreth + res += mhpmcounter3h + res += mhpmcounter4h + res += mhpmcounter5h + res += mhpmcounter6h + res += mhpmcounter7h + res += mhpmcounter8h + res += mhpmcounter9h + res += mhpmcounter10h + res += mhpmcounter11h + res += mhpmcounter12h + res += mhpmcounter13h + res += mhpmcounter14h + res += mhpmcounter15h + res += mhpmcounter16h + res += mhpmcounter17h + res += mhpmcounter18h + res += mhpmcounter19h + res += mhpmcounter20h + res += mhpmcounter21h + res += mhpmcounter22h + res += mhpmcounter23h + res += mhpmcounter24h + res += mhpmcounter25h + res += mhpmcounter26h + res += mhpmcounter27h + res += mhpmcounter28h + res += mhpmcounter29h + res += mhpmcounter30h + res += mhpmcounter31h + res.toArray + } + } + + object CustomCSRs { + val mnscratch = 0x350 + val mnepc = 0x351 + val mncause = 0x352 + val mnstatus = 0x353 + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += mnscratch + res += mnepc + res += mncause + res += mnstatus + res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) + res.toArray + } + } +} diff --git a/rocketv/src/csr/V.scala b/rocketv/src/csr/V.scala new file mode 100644 index 000000000..944448f13 --- /dev/null +++ b/rocketv/src/csr/V.scala @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu + +package org.chipsalliance.rocketv.csr + +import chisel3._ +import chisel3.util.log2Ceil + +// context for Vector +class V(vlen: Int, hypervisor: Boolean) { + require(Module.currentModule.isDefined) + def vlWidth: Int = log2Ceil(vlen) + 1 + def vlenbWidth = log2Ceil(vlen / 8) + val contents: Seq[String] = Seq( + "misa.V", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#32-vector-context-status-in-mstatus + "mstatus.VS", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#33-vector-context-status-in-vsstatus + "vsstatus.VS", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#341-vector-selected-element-width-vsew20 + "vsew", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#342-vector-register-grouping-vlmul20 + "vlmul", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#343-vector-tail-agnostic-and-vector-mask-agnostic-vta-and-vma + "vta", + "vma", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#344-vector-type-illegal-vill + "vill", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#35-vector-length-register-vl + "vl", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#36-vector-byte-length-vlenb + "vlenb", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#37-vector-start-index-csr-vstart + "vstart", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#38-vector-fixed-point-rounding-mode-register-vxrm + "vxrm", + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#39-vector-fixed-point-saturation-flag-vxsat + "vxsat" + ) + def chiselType(content: String): Data = content match { + case "misa.V" => Bool() + case "mstatus.VS" => UInt(2.W) + case "vsstatus.VS" => UInt(2.W) + case "vlmul" => UInt(3.W) + case "vsew" => UInt(3.W) + case "vta" => Bool() + case "vma" => Bool() + case "vill" => Bool() + case "vl" => UInt(vlWidth.W) + case "vlenb" => UInt(vlenbWidth.W) + case "vstart" => UInt(vlWidth.W) + case "vxrm" => UInt(2.W) + case "vxsat" => UInt(2.W) + } + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#311-state-of-vector-extension-at-reset + def reset(content: String): Option[UInt] = content match { + // 1 -> Initial; 2 -> Clean; 3 -> Dirty + case "mstatus.VS" => Some(0.U) + // It is recommended that at reset, vtype.vill is set, the remaining bits in vtype are zero, and vl is set to zero. + case "vlmul" => Some(0.U) + case "vsew" => Some(0.U) + case "vta" => Some(false.B) + case "vma" => Some(false.B) + case "vill" => Some(true.B) + // The vector extension must have a consistent state at reset. In particular, vtype and vl must have values that can be read and then restored with a single vsetvl instruction. + case "vl" => Some(0.U) + // The vstart, vxrm, vxsat CSRs can have arbitrary values at reset. + case _ => None + } + def constant(content: String): Option[UInt] = content match { + // MISA in Rocket is not writable. + case "misa.V" => Some(true.B) + case "vlenb" => Some((vlen / 8).U) + case _ => None + } + + val states: Map[String, UInt] = + (Seq( + "mstatus.VS", + "vsew", + "vlmul", + "vta", + "vma", + "vill", + "vl", + "vstart", + "vxrm", + "vxsat" + ) ++ Option.when(hypervisor)( + // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#33-vector-context-status-in-vsstatus + "vsstatus.VS" + )).map { content: String => + content -> + reset(content) + .map(resetValue => RegInit(resetValue)) + .getOrElse(Reg(chiselType(content))) + .suggestName(content) + .asUInt + }.toMap + + val constants: Map[String, UInt] = Seq( + // MISA in Rocket is not writable + "misa.V", + "vlenb" + ).map { content: String => + content -> constant(content).get + }.toMap +} From 0e5e740b0de6ef9a7f7d85b97dc6139feafd9a5d Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 28 Jun 2024 16:43:25 +0800 Subject: [PATCH 039/140] [rocketv] add elaborator for CSR - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.CSR config --vLen 512 --useAsyncReset false --xLen 32 --fLen 32 --usingSupervisor false --usingFPU true --usingUser false --usingVM false --pgLevels 2 --hartIdLen 1 --usingCompressed true --usingAtomics true --usingDebug true --usingMulDiv true --usingVector true - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.CSR design --parameter ./CSR.json --run-firtool --- elaborator/src/rocketv/CSR.scala | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 elaborator/src/rocketv/CSR.scala diff --git a/elaborator/src/rocketv/CSR.scala b/elaborator/src/rocketv/CSR.scala new file mode 100644 index 000000000..17725a517 --- /dev/null +++ b/elaborator/src/rocketv/CSR.scala @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{CSR, CSRParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object CSR extends Elaborator { + @main + case class CSRParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "vLen") vLen: Int, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "hartIdLen") hartIdLen: Int, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "vmidBits") vmidBits: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "nPerfCounters") nPerfCounters: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "usingSupervisor") usingSupervisor: Boolean, + @arg(name = "usingFPU") usingFPU: Boolean, + @arg(name = "usingUser") usingUser: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingCompressed") usingCompressed: Boolean, + @arg(name = "usingAtomics") usingAtomics: Boolean, + @arg(name = "usingDebug") usingDebug: Boolean, + @arg(name = "usingMulDiv") usingMulDiv: Boolean, + @arg(name = "usingVector") usingVector: Boolean) { + def convert: CSRParameter = CSRParameter( + useAsyncReset: Boolean, + vLen: Int, + xLen: Int, + fLen: Int, + hartIdLen: Int, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + vmidBits: Int, + nPMPs: Int, + nPerfCounters: Int, + paddrBits: Int, + nBreakpoints: Int, + usingSupervisor: Boolean, + usingFPU: Boolean, + usingUser: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingAtomics: Boolean, + usingDebug: Boolean, + usingMulDiv: Boolean, + usingVector: Boolean + ) + } + + implicit def CSRParameterMainParser: ParserForClass[CSRParameterMain] = ParserForClass[CSRParameterMain] + + @main + def config(@arg(name = "parameter") parameter: CSRParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[CSR, CSRParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 31e6faa4945adb50f126ab499910bc18b75f7359 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sat, 29 Jun 2024 14:58:43 +0800 Subject: [PATCH 040/140] [rocketv] migrate Decoder --- rocketv/src/Decoder.scala | 749 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 749 insertions(+) create mode 100644 rocketv/src/Decoder.scala diff --git a/rocketv/src/Decoder.scala b/rocketv/src/Decoder.scala new file mode 100644 index 000000000..419e407b0 --- /dev/null +++ b/rocketv/src/Decoder.scala @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.BitPat +import chisel3.util.experimental.decode.{BoolDecodeField, DecodeField, DecodePattern, DecodeTable} +import org.chipsalliance.rvdecoderdb.{Encoding, Instruction, InstructionSet} + +// behave like ChiselEnum, but for compatibility, use UInt for now. +// This is going to be upstreamed to Chisel in the future. +trait UOP { + def width: Int + + def dontCare: BitPat = BitPat.dontCare(width) + + def chiselType: TPE = UInt(width.W) + + def encode(lit: Int): BitPat = BitPat(lit.U(width.W)) + + def encode(strLit: String): BitPat = BitPat(strLit.U(width.W)) + + type TPE = UInt +} + +trait UOPDecodeField[T <: DecodePattern] extends DecodeField[T, UInt] { + def uopType: UOP + + def chiselType: UInt = uopType.chiselType +} + +object CustomInstructions { + private def rocket(name: String, encoding: Encoding) = + Instruction(name, encoding, Seq(), Seq(InstructionSet("rv_rocket")), None, false, true) + + val rocketSet = Seq( + // should be replaced by: + // cbo.clean rs1 31..20=1 14..12=2 11..7=0 6..2=0x03 1..0=3 + // cbo.flush rs1 31..20=2 14..12=2 11..7=0 6..2=0x03 1..0=3 + // cbo.inval rs1 31..20=0 14..12=2 11..7=0 6..2=0x03 1..0=3 + rocket("c.flush.d.l1", Encoding.fromString("111111000000?????000000001110011")), + rocket("c.discard.d.l1", Encoding.fromString("111111000010?????000000001110011")), + // no standard instruction, maybe we need to change this to mmio store to PMU + rocket("cease", Encoding.fromString("00110000010100000000000001110011")) + ) +} + +object DecoderParameter { + implicit def rwP: upickle.default.ReadWriter[DecoderParameter] = upickle.default.macroRW[DecoderParameter] +} + +case class DecoderParameter( + instructionSets: Set[String], + pipelinedMul: Boolean, + fenceIFlushDCache: Boolean) + extends SerializableModuleParameter { + val instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + + // functions below is my little reminder, which is used for future rocket core refactoring, just keep it, I'll remove it later in the future. + private def hasAnySetIn(sets: String*): Boolean = + sets.exists(set => instructions.flatMap(_.instructionSets.map(_.name)).exists(_.contains(set))) + + private def xLen32: Boolean = instructions.map(_.instructionSet.name).exists(_.startsWith("rv32_")) + + private def xLen64: Boolean = instructions.map(_.instructionSet.name).exists(_.startsWith("rv64_")) + + private def fLen0: Boolean = !fLen32 && !fLen64 + + private def fLen32: Boolean = hasAnySetIn("rv_f", "rv32_f", "rv64_f") + + private def fLen64: Boolean = hasAnySetIn("rv_d", "rv32_d", "rv64_d") + + private val useFPU = !fLen0 + private val useMulDiv = hasAnySetIn("rv_m", "rv64_m") + private val useVector = hasAnySetIn("rv_v") + + private val instructionDecodePatterns: Seq[RocketDecodePattern] = instructions.map(RocketDecodePattern.apply) + private val instructionDecodeFields: Seq[DecodeField[RocketDecodePattern, _ <: Data]] = Seq( + isLegal, + isBranch, + isJal, + isJalr, + rxs2, + rxs1, + selAlu2, + selAlu1, + selImm, + aluDoubleWords, + mem, + memCommand, + wxd, + csr, + fenceI, + fence, + amo, + aluFn + ) ++ + (if (useFPU) Seq(fp, rfs1, rfs2, rfs3, wfd, dp) else None) ++ + (if (useMulDiv) if (pipelinedMul) Seq(mul, div) else Seq(div) else None) ++ + (if (useVector) Seq(vector, vectorLSU, vectorCSR) else None) + private val Y = BitPat.Y() + private val N = BitPat.N() + + val table: DecodeTable[RocketDecodePattern] = new DecodeTable[RocketDecodePattern]( + instructionDecodePatterns, + instructionDecodeFields + ) + + object isLegal extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "legal" + + override def default: BitPat = n + + // should always be true + override def genTable(op: RocketDecodePattern): BitPat = y + } + + object fp extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "fp" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.instructionSet.name match { + // format: off + case s if Seq( + "rv_d", "rv64_d", + "rv_f", "rv64_f", + "rv_q", "rv64_q", + "rv_zfh", "rv64_zfh", "rv_d_zfh", "rv_q_zfh", + ).contains(s) => y + case _ => n + // format: on + } + } + + object dp extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "dp" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.instructionSet.name match { + // format: off + case s if Seq("rv_d", "rv_d_zfh", "rv64_d").contains(s) => y + case _ => n + // format: on + } + } + + object isBranch extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "branch" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("bne", "beq", "blt", "bltu", "bge", "bgeu").contains(i) => y + case _ => n + // format: on + } + } + + object isJal extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "jal" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("jal").contains(i) => y + case _ => n + // format: on + } + } + + object isJalr extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "jalr" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("jalr").contains(i) => y + case _ => n + // format: on + } + } + + object rxs2 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rxs2" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "hsv.w", "hsv.b", "hfence.vvma", "hsv.h", "hfence.gvma", "hsv.d", "or", "srl", "sltu", "sra", "sb", "add", "xor", "beq", "bge", "sw", "blt", "bgeu", "bltu", "bne", "sub", "and", "slt", "sh", "sll", "addw", "sd", "sllw", "sraw", "subw", "srlw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "sfence.vma", "czero.nez", "czero.eqz").contains(i) => y + case (_, p) if p.vectorReadRs2 => y + case _ => n + // format: on + } + } + + object rxs1 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rxs1" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fcvt.d.wu", "fsd", "fcvt.d.w", "fcvt.d.lu", "fmv.d.x", "fcvt.d.l", "fcvt.s.wu", "fmv.w.x", "fsw", "fcvt.s.w", "flw", "fcvt.s.lu", "fcvt.s.l", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "bne", "lbu", "sub", "and", "xori", "slti", "slt", "addi", "lb", "sh", "sll", "srli", "srai", "slli", "ld", "addw", "sd", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "sfence.vma", "fsh", "flh", "fcvt.h.wu", "fcvt.h.w", "fmv.h.x", "fcvt.h.lu", "fcvt.h.l", "csrrc", "csrrs", "csrrw", "czero.nez", "czero.eqz", "cflush.d.l1", "cdiscard.d.l1").contains(i) => y + case (i, _) if Seq("ecall", "ebreak", "mret", "wfi", "sret", "dret", "cease", "nmret").contains(i) => dc + case (_, p) if p.vectorReadRs1 => y + case _ => n + // format: on + } + } + + object fenceI extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "fence_i" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fence.i").contains(i) => y + case _ => n + // format: on + } + } + + object fence extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "fence" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fence").contains(i) => y + case _ => n + // format: on + } + } + + object amo extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "amo" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.instructionSet.name match { + // format: off + case s if Seq("rv_a", "rv64_a").contains(s) => y + case _ => n + // format: on + } + } + + object aluDoubleWords extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "alu_dw" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fsd", "fsw", "flw", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "lui", "bne", "lbu", "sub", "and", "auipc", "xori", "slti", "slt", "addi", "lb", "jal", "sh", "sll", "srli", "srai", "slli", "ld", "sd", "lwu", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "sfence.vma", "fsh", "flh", "csrrc", "csrrci", "csrrs", "csrrw", "csrrsi", "csrrwi", "czero.nez", "czero.eqz").contains(i) => y + case i if Seq("addw", "sraiw", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "remuw", "divw", "divuw", "mulw", "remw").contains(i) => n + case _ => dc + // format: on + } + } + } + + object mem extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "mem" + + override def default: BitPat = n + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fsd", "fsw", "flw", "hsv.w", "hsv.b", "hlv.hu", "hlv.b", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hsv.d", "hlv.d", "hlv.wu", "lhu", "sb", "lw", "sw", "lh", "lbu", "lb", "sh", "ld", "sd", "lwu", "sfence.vma", "fsh", "flh").contains(i) => y + case i if Seq("fence.i").contains(i) && fenceIFlushDCache => y + case _ => n + // format: on + } + } + } + + object rfs1 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rfs1" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("fmin.d", "fsgnj.d", "fle.d", "fnmsub.d", "fadd.d", "fcvt.w.d", "fmsub.d", "fmul.d", "fcvt.wu.d", "feq.d", "fmax.d", "fnmadd.d", "fcvt.d.s", "fcvt.s.d", "fmadd.d", "fsgnjx.d", "flt.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fclass.d", "fdiv.d", "fmv.x.d", "fcvt.lu.d", "fcvt.l.d", "fcvt.d.h", "fcvt.h.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fclass.s", "fcvt.wu.s", "fmax.s", "feq.s", "fle.s", "fmadd.s", "fsgnj.s", "fadd.s", "flt.s", "fmv.x.w", "fnmadd.s", "fmul.s", "fcvt.w.s", "fsub.s", "fcvt.lu.s", "fcvt.l.s", "feq.h", "fsgnjx.h", "fcvt.w.h", "fcvt.h.s", "fdiv.h", "fclass.h", "fsgnj.h", "fmul.h", "fsub.h", "fcvt.wu.h", "fadd.h", "fmax.h", "fsgnjn.h", "fmv.x.h", "fcvt.s.h", "fmsub.h", "fmin.h", "fsqrt.h", "flt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fle.h", "fcvt.l.h", "fcvt.lu.h").contains(i) => y + case _ => n + // format: on + } + } + } + + object rfs2 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rfs2" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("fmin.d", "fsgnj.d", "fle.d", "fnmsub.d", "fadd.d", "fmsub.d", "fmul.d", "feq.d", "fmax.d", "fnmadd.d", "fmadd.d", "fsgnjx.d", "flt.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fdiv.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fmax.s", "feq.s", "fle.s", "fmadd.s", "fsgnj.s", "fadd.s", "flt.s", "fnmadd.s", "fmul.s", "fsub.s", "feq.h", "fsgnjx.h", "fdiv.h", "fsgnj.h", "fmul.h", "fsub.h", "fadd.h", "fmax.h", "fsgnjn.h", "fmsub.h", "fmin.h", "fsqrt.h", "flt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fle.h").contains(i) => y + case _ => n + // format: on + } + } + } + + object rfs3 extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "rfs3" + + override def genTable(op: RocketDecodePattern): BitPat = + op.instruction.name match { + // format: off + case i if Seq("fnmsub.d", "fmsub.d", "fnmadd.d", "fmadd.d", "fnmsub.s", "fmsub.s", "fmadd.s", "fnmadd.s", "fmsub.h", "fnmadd.h", "fmadd.h", "fnmsub.h").contains(i) => y + case _ => n + // format: on + } + } + + object wfd extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "wfd" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fmin.d", "fsgnj.d", "fnmsub.d", "fadd.d", "fmsub.d", "fld", "fmul.d", "fmax.d", "fcvt.d.wu", "fnmadd.d", "fcvt.d.s", "fcvt.s.d", "fmadd.d", "fsgnjx.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fcvt.d.w", "fdiv.d", "fcvt.d.lu", "fmv.d.x", "fcvt.d.l", "fcvt.d.h", "fcvt.h.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fmax.s", "fcvt.s.wu", "fmv.w.x", "fmadd.s", "fsgnj.s", "fadd.s", "fnmadd.s", "fcvt.s.w", "flw", "fmul.s", "fsub.s", "fcvt.s.lu", "fcvt.s.l", "fsgnjx.h", "fcvt.h.s", "fdiv.h", "fsgnj.h", "fmul.h", "fsub.h", "flh", "fadd.h", "fmax.h", "fsgnjn.h", "fcvt.s.h", "fcvt.h.wu", "fcvt.h.w", "fmsub.h", "fmin.h", "fsqrt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fmv.h.x", "fcvt.h.lu", "fcvt.h.l").contains(i) => y + case _ => n + // format: on + } + } + + object mul extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "mul" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("mulhsu", "mul", "mulhu", "mulh", "mulw").contains(i) => y + case _ => n + // format: on + } + } + + object div extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "div" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("mulhsu", "mul", "mulhu", "mulh", "mulw").contains(i) && !pipelinedMul => y + case i if Seq("rem", "div", "remu", "divu", "remuw", "divw", "divuw", "remw").contains(i) => y + case _ => n + // format: on + } + } + + object wxd extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "wxd" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + // TODO: filter out rd + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fle.d", "fcvt.w.d", "fcvt.wu.d", "feq.d", "flt.d", "fclass.d", "fmv.x.d", "fcvt.lu.d", "fcvt.l.d", "fclass.s", "fcvt.wu.s", "feq.s", "fle.s", "flt.s", "fmv.x.w", "fcvt.w.s", "fcvt.lu.s", "fcvt.l.s", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hlv.h", "hlv.bu", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "lw", "add", "xor", "andi", "sltiu", "lh", "jalr", "lui", "lbu", "sub", "and", "auipc", "xori", "slti", "slt", "addi", "lb", "jal", "sll", "srli", "srai", "slli", "ld", "addw", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "feq.h", "fcvt.w.h", "fclass.h", "fcvt.wu.h", "fmv.x.h", "flt.h", "fle.h", "fcvt.l.h", "fcvt.lu.h", "csrrc", "csrrci", "csrrs", "csrrw", "csrrsi", "csrrwi", "czero.nez", "czero.eqz").contains(i) => y + case i if Seq("vsetvl", "vsetivli", "vsetvli", "vmv.x.s", "vcpop.m", "vfirst.m").contains(i) => y + case _ => n + // format: on + } + } + + // UOPs + + object UOPMEM extends UOP { + def width = 5 + + def xrd: BitPat = encode("b00000") + + def xwr: BitPat = encode("b00001") + + def pfr: BitPat = encode("b00010") + + def pfw: BitPat = encode("b00011") + + def xaSwap: BitPat = encode("b00100") + + def flushAll: BitPat = encode("b00101") + + def xlr: BitPat = encode("b00110") + + def xsc: BitPat = encode("b00111") + + def xaAdd: BitPat = encode("b01000") + + def xaXor: BitPat = encode("b01001") + + def xaOr: BitPat = encode("b01010") + + def xaAnd: BitPat = encode("b01011") + + def xaMin: BitPat = encode("b01100") + + def xaMax: BitPat = encode("b01101") + + def xaMinu: BitPat = encode("b01110") + + def xaMaxu: BitPat = encode("b01111") + + // TODO: unused + def flush: BitPat = encode("b10000") + + // TODO: unused + def pwr: BitPat = encode("b10001") + + // TODO: unused + def produce: BitPat = encode("b10010") + + // TODO: unused + def clean: BitPat = encode("b10011") + + def sfence: BitPat = encode("b10100") + + def hfencev: BitPat = encode("b10101") + + def hfenceg: BitPat = encode("b10110") + + def wok: BitPat = encode("b10111") + + def hlvx: BitPat = encode("b10000") + } + + object memCommand extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "mem_cmd" + + override def genTable(op: RocketDecodePattern): BitPat = { + op.instruction.name match { + // format: off + case i if Seq("fld", "flh", "flw", "hlv.b", "hlv.bu", "hlv.d", "hlv.h", "hlv.hu", "hlv.w", "hlv.wu", "lb", "lbu", "ld", "lh", "lhu", "lw", "lwu").contains(i) => UOPMEM.xrd + case i if Seq("fsd", "fsh", "fsw", "hsv.b", "hsv.d", "hsv.h", "hsv.w", "sb", "sd", "sh", "sw").contains(i) => UOPMEM.xwr + case i if Seq("amoswap.d", "amoswap.w").contains(i) => UOPMEM.xaSwap + case i if Seq("fence.i").contains(i) && fenceIFlushDCache => UOPMEM.flushAll + case i if Seq("lr.d", "lr.w").contains(i) => UOPMEM.xlr + case i if Seq("sc.d", "sc.w").contains(i) => UOPMEM.xsc + case i if Seq("amoadd.d", "amoadd.w").contains(i) => UOPMEM.xaAdd + case i if Seq("amoxor.d", "amoxor.w").contains(i) => UOPMEM.xaXor + case i if Seq("amoor.d", "amoor.w").contains(i) => UOPMEM.xaOr + case i if Seq("amoand.d", "amoand.w").contains(i) => UOPMEM.xaAnd + case i if Seq("amomin.d", "amomin.w").contains(i) => UOPMEM.xaMin + case i if Seq("amomax.d", "amomax.w").contains(i) => UOPMEM.xaMax + case i if Seq("amominu.d", "amominu.w").contains(i) => UOPMEM.xaMinu + case i if Seq("amomaxu.d", "amomaxu.w").contains(i) => UOPMEM.xaMaxu + case i if Seq("sfence.vma").contains(i) => UOPMEM.sfence + case i if Seq("hfence.vvma").contains(i) => UOPMEM.hfencev + case i if Seq("hfence.gvma").contains(i) => UOPMEM.hfenceg + case i if Seq("hlvx.hu", "hlvx.wu").contains(i) => UOPMEM.hlvx + case _ => UOPMEM.dontCare + // format: on + } + } + + override def uopType: UOPMEM.type = UOPMEM + } + + object UOPCSR extends UOP { + def width = 3 + + def n: BitPat = encode(0) + + def r: BitPat = encode(2) + + def i: BitPat = encode(4) + + def w: BitPat = encode(5) + + def s: BitPat = encode(6) + + def c: BitPat = encode(7) + } + + object csr extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "csr" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + // TODO: default should be N? + case i if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fmin.d", "fsgnj.d", "fle.d", "fnmsub.d", "fadd.d", "fcvt.w.d", "fmsub.d", "fld", "fmul.d", "fcvt.wu.d", "feq.d", "fmax.d", "fcvt.d.wu", "fnmadd.d", "fcvt.d.s", "fcvt.s.d", "fsd", "fmadd.d", "fsgnjx.d", "flt.d", "fsgnjn.d", "fsub.d", "fsqrt.d", "fclass.d", "fcvt.d.w", "fdiv.d", "fcvt.d.lu", "fmv.x.d", "fmv.d.x", "fcvt.lu.d", "fcvt.l.d", "fcvt.d.l", "fcvt.d.h", "fcvt.h.d", "fnmsub.s", "fsgnjx.s", "fmsub.s", "fsgnjn.s", "fdiv.s", "fmin.s", "fsqrt.s", "fclass.s", "fcvt.wu.s", "fmax.s", "feq.s", "fcvt.s.wu", "fmv.w.x", "fle.s", "fmadd.s", "fsgnj.s", "fadd.s", "fsw", "flt.s", "fmv.x.w", "fnmadd.s", "fcvt.s.w", "flw", "fmul.s", "fcvt.w.s", "fsub.s", "fcvt.lu.s", "fcvt.s.lu", "fcvt.l.s", "fcvt.s.l", "or", "srl", "fence", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "lui", "bne", "lbu", "sub", "and", "auipc", "xori", "slti", "slt", "addi", "lb", "jal", "sh", "sll", "srli", "srai", "slli", "ld", "addw", "sd", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "feq.h", "fsgnjx.h", "fcvt.w.h", "fcvt.h.s", "fdiv.h", "fclass.h", "fsh", "fsgnj.h", "fmul.h", "fsub.h", "flh", "fcvt.wu.h", "fadd.h", "fmax.h", "fsgnjn.h", "fmv.x.h", "fcvt.s.h", "fcvt.h.wu", "fcvt.h.w", "fmsub.h", "fmin.h", "fsqrt.h", "flt.h", "fnmadd.h", "fmadd.h", "fnmsub.h", "fmv.h.x", "fle.h", "fcvt.l.h", "fcvt.lu.h", "fcvt.h.lu", "fcvt.h.l", "fence.i", "czero.nez", "czero.eqz").contains(i) => UOPCSR.n + case i if Seq("cdiscard.d.l1", "cease", "cflush.d.l1", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "ebreak", "ecall", "sret", "sfence.vma", "dret", "wfi", "mret", "mnret").contains(i) => UOPCSR.i + case i if Seq("csrrw", "csrrwi").contains(i) => UOPCSR.w + case i if Seq("csrrs", "csrrsi").contains(i) => UOPCSR.s + case i if Seq("csrrc", "csrrci").contains(i) => UOPCSR.c + case _ => UOPCSR.dontCare + // format: on + } + + override def uopType: UOPCSR.type = UOPCSR + } + + object UOPALU extends UOP { + def width = 4 + + def add: BitPat = encode(0) + + def sl: BitPat = encode(1) + + def seq: BitPat = encode(2) + + def sne: BitPat = encode(3) + + def xor: BitPat = encode(4) + + def sr: BitPat = encode(5) + + def or: BitPat = encode(6) + + def and: BitPat = encode(7) + + def czeqz: BitPat = encode(8) + + def cznez: BitPat = encode(9) + + def sub: BitPat = encode(10) + + def sra: BitPat = encode(11) + + def slt: BitPat = encode(12) + + def sge: BitPat = encode(13) + + def sltu: BitPat = encode(14) + + def sgeu: BitPat = encode(15) + + def div: BitPat = xor + + def divu: BitPat = sr + + def rem: BitPat = or + + def remu: BitPat = and + + def mul: BitPat = add + + def mulh: BitPat = sl + + def mulhsu: BitPat = seq + + def mulhu: BitPat = sne + } + + object aluFn extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "alu_fn" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fsd", "fsw", "flw", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "lhu", "sb", "lw", "add", "sw", "lh", "jalr", "lui", "lbu", "auipc", "addi", "lb", "jal", "sh", "ld", "addw", "sd", "lwu", "addiw", "sfence.vma", "fsh", "flh", "csrrc", "csrrci", "csrrs", "csrrw", "csrrsi", "csrrwi", "cdiscard.d.l1", "cflush.d.l1").contains(i) => UOPALU.add + case (i, _) if Seq("and", "andi").contains(i) => UOPALU.and + case (i, _) if Seq("or", "ori").contains(i) => UOPALU.or + case (i, _) if Seq("beq").contains(i) => UOPALU.seq + case (i, _) if Seq("bge").contains(i) => UOPALU.sge + case (i, _) if Seq("bgeu").contains(i) => UOPALU.sgeu + case (i, _) if Seq("sll", "slli", "slli", "slliw", "sllw").contains(i) => UOPALU.sl + case (i, _) if Seq("blt", "slt", "slti").contains(i) => UOPALU.slt + case (i, _) if Seq("bltu", "sltiu", "sltu").contains(i) => UOPALU.sltu + case (i, _) if Seq("bne").contains(i) => UOPALU.sne + case (i, _) if Seq("srl", "srli", "srli", "srliw", "srlw").contains(i) => UOPALU.sr + case (i, _) if Seq("sra", "srai", "srai", "sraiw", "sraw").contains(i) => UOPALU.sra + case (i, _) if Seq("sub", "subw").contains(i) => UOPALU.sub + case (i, _) if Seq("xor", "xori").contains(i) => UOPALU.xor + + // rv_m + case (i, _) if Seq("mul", "mulw").contains(i) => UOPALU.mul + case (i, _) if Seq("mulh").contains(i) => UOPALU.mulh + case (i, _) if Seq("mulhu").contains(i) => UOPALU.mulhu + case (i, _) if Seq("mulhsu").contains(i) => UOPALU.mulhsu + case (i, _) if Seq("div", "divw").contains(i) => UOPALU.div + case (i, _) if Seq("divu", "divuw").contains(i) => UOPALU.divu + case (i, _) if Seq("rem", "remw").contains(i) => UOPALU.rem + case (i, _) if Seq("remu", "remuw").contains(i) => UOPALU.remu + + case (i, _) if Seq("czero.eqz").contains(i) => UOPALU.czeqz + case (i, _) if Seq("czero.nez").contains(i) => UOPALU.cznez + // vector + // 7. Vector read RS1 go through ALU rs1 + 0. + case (_, p) if p.vectorReadRs1 => UOPALU.add + case _ => UOPALU.dontCare + // format: on + } + + override def uopType: UOPALU.type = UOPALU + } + + object UOPIMM extends UOP { + def width = 3 + + def s: BitPat = encode(0) + + def sb: BitPat = encode(1) + + def u: BitPat = encode(2) + + def uj: BitPat = encode(3) + + def i: BitPat = encode(4) + + def z: BitPat = encode(5) + } + + object selImm extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "sel_imm" + + override def genTable(op: RocketDecodePattern): BitPat = op.instruction.name match { + // format: off + case i if Seq("fld", "flw", "hsv.w", "hsv.b", "hsv.h", "hsv.d", "ori", "lhu", "lw", "andi", "sltiu", "lh", "jalr", "lbu", "xori", "slti", "addi", "lb", "srli", "srai", "slli", "ld", "sraiw", "lwu", "addiw", "srliw", "slliw", "flh").contains(i) => UOPIMM.i + case i if Seq("fsd", "fsh", "fsw", "sb", "sd", "sh", "sw").contains(i) => UOPIMM.s + case i if Seq("beq", "bge", "bgeu", "blt", "bltu", "bne").contains(i) => UOPIMM.sb + case i if Seq("auipc", "lui").contains(i) => UOPIMM.u + case i if Seq("jal").contains(i) => UOPIMM.uj + case i if Seq("csrrci", "csrrsi", "csrrwi").contains(i) => UOPIMM.z + case _ => UOPIMM.dontCare + // format: on + } + + override def uopType: UOPIMM.type = UOPIMM + } + + object UOPA1 extends UOP { + def width = 2 + + def zero: BitPat = encode(0) + + def rs1: BitPat = encode(1) + + def pc: BitPat = encode(2) + } + + object selAlu1 extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "sel_alu1" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("auipc", "jal").contains(i) => UOPA1.pc + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "fld", "fcvt.d.wu", "fsd", "fcvt.d.w", "fcvt.d.lu", "fmv.d.x", "fcvt.d.l", "fcvt.s.wu", "fmv.w.x", "fsw", "fcvt.s.w", "flw", "fcvt.s.lu", "fcvt.s.l", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "or", "srl", "ori", "lhu", "sltu", "sra", "sb", "lw", "add", "xor", "beq", "andi", "bge", "sw", "blt", "bgeu", "sltiu", "lh", "bltu", "jalr", "bne", "lbu", "sub", "and", "xori", "slti", "slt", "addi", "lb", "sh", "sll", "srli", "srai", "slli", "ld", "addw", "sd", "sraiw", "lwu", "sllw", "sraw", "subw", "srlw", "addiw", "srliw", "slliw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "sfence.vma", "fsh", "flh", "fcvt.h.wu", "fcvt.h.w", "fmv.h.x", "fcvt.h.lu", "fcvt.h.l", "csrrc", "csrrs", "csrrw", "czero.nez", "czero.eqz", "cdiscard.d.l1", "cflush.d.l1").contains(i) => UOPA1.rs1 + case (_, p) if p.vectorReadRs1 => UOPA1.rs1 + case (i, _) if Seq("csrrci", "csrrsi", "csrrwi", "lui").contains(i) => UOPA1.zero + case _ => UOPA1.dontCare + } + + override def uopType: UOPA1.type = UOPA1 + } + + object UOPA2 extends UOP { + def width = 2 + + def zero: BitPat = encode(0) + + def size: BitPat = encode(1) + + def rs2: BitPat = encode(2) + + def imm: BitPat = encode(3) + } + + object selAlu2 extends UOPDecodeField[RocketDecodePattern] { + override def name: String = "sel_alu2" + + override def genTable(op: RocketDecodePattern): BitPat = (op.instruction.name, op) match { + // format: off + case (i, _) if Seq("fld", "fsd", "fsw", "flw", "ori", "lhu", "sb", "lw", "andi", "sw", "sltiu", "lh", "jalr", "lui", "lbu", "auipc", "xori", "slti", "addi", "lb", "sh", "srli", "srai", "slli", "ld", "sd", "sraiw", "lwu", "addiw", "srliw", "slliw", "fsh", "flh", "csrrci", "csrrsi", "csrrwi").contains(i) => UOPA2.imm + case (i, _) if Seq("or", "srl", "sltu", "sra", "add", "xor", "beq", "bge", "blt", "bgeu", "bltu", "bne", "sub", "and", "slt", "sll", "addw", "sllw", "sraw", "subw", "srlw", "mulhsu", "rem", "div", "mul", "mulhu", "mulh", "remu", "divu", "remuw", "divw", "divuw", "mulw", "remw", "czero.nez", "czero.eqz").contains(i) => UOPA2.rs2 + case (i, _) if Seq("jal").contains(i) => UOPA2.size + case (i, _) if Seq("amomaxu.w", "amoand.w", "amoor.w", "amoxor.w", "amoswap.w", "lr.w", "amomax.w", "amoadd.w", "amomin.w", "amominu.w", "sc.w", "lr.d", "amomax.d", "amoswap.d", "amoxor.d", "amoand.d", "amomin.d", "amoor.d", "amoadd.d", "amomaxu.d", "amominu.d", "sc.d", "hsv.w", "hsv.b", "hfence.vvma", "hlv.hu", "hlvx.hu", "hlv.b", "hlvx.wu", "hlv.w", "hsv.h", "hlv.h", "hlv.bu", "hfence.gvma", "hsv.d", "hlv.d", "hlv.wu", "sfence.vma", "csrrc", "csrrs", "csrrw", "cdiscard.d.l1", "cflush.d.l1").contains(i) => UOPA2.zero + case (_, p) if p.vectorReadRs1 => UOPA2.zero + case _ => UOPA2.dontCare + } + + override def uopType: UOPA2.type = UOPA2 + } + + object vector extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "vector" + + override def genTable(op: RocketDecodePattern): BitPat = if (op.instruction.instructionSet.name == "rv_v") Y else N + } + + object vectorLSU extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "vectorLSU" + + override def genTable(op: RocketDecodePattern): BitPat = if (op.isVectorLSU) Y else N + } + + object vectorCSR extends BoolDecodeField[RocketDecodePattern] { + override def name: String = "vectorCSR" + + override def genTable(op: RocketDecodePattern): BitPat = if (op.isVectorCSR) Y else N + } +} + +class DecoderInterface(parameter: DecoderParameter) extends Bundle { + val instruction = Input(UInt(32.W)) + val output = Output(parameter.table.bundle) +} + +/** DecodePattern for an RISC-V instruction */ +case class RocketDecodePattern(instruction: Instruction) extends DecodePattern { + override def bitPat: BitPat = BitPat("b" + instruction.encoding.toString) + def isVector = instruction.instructionSet.name == "rv_v" + def isVectorCSR = Seq("vsetvl", "vsetivli", "vsetvli").contains(instruction.name) + def isVectorLSU = instruction.name match { + // unit stride + // load/store(t) sz element + case s"v${t}e${sz}.v" if (t == "l") || (t == "s") => true + // alias to vl(s)e1.v + case s"v${t}m.v" if (t == "l") || (t == "s") => true + // load/store(t) element w/ first fault + case s"v${t}e${sz}ff.v" if (t == "l") || (t == "s") => true + // load/store(t) r registers with VLEN/sz bytes + case s"v${tr}re${sz}.v" if tr.startsWith("l") || tr.startsWith("s") => true + // alias to vl(s)szr.v + case s"v${tsz}r.v" if tsz.startsWith("l") || tsz.startsWith("s") => true + // stride + case s"v${t}se${sz}.v" if (t == "l") || (t == "s") => true + // indexed + case s"v${to}xei${sz}.v" if (to == "lo" || to == "lu" || to == "so" || to == "su") => true + case _ => false + } + // todo: unsure. + def vectorReadRs1: Boolean = isVectorLSU || (instruction.name match { + // vx type + case s"v${op}.vx" => true + case s"v${op}.v.x" => true + // set vl + case s"vsetvl${i}" => true + case _ => false + }) + def vectorReadRs2 = instruction.name match { + // set vl + case s"vsetvl" => true + // stride + case s"v${t}se${sz}.v" if (t == "l") || (t == "s") => true + case _ => false + } +} + +@instantiable +class Decoder(val parameter: DecoderParameter) + extends FixedIORawModule(new DecoderInterface(parameter)) + with SerializableModule[DecoderParameter] { + io.output := parameter.table.decode(io.instruction) +} From a865ee0830c10eab87292b51ff352350ce04447e Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sat, 29 Jun 2024 15:00:03 +0800 Subject: [PATCH 041/140] [rocketv] add elaborator for Decoder - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.Decoder config --pipelinedMul false --fenceIFlushDCache false --instructionSets rv32_i --instructionSets rv_v - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.Decoder design --parameter ./Decoder.json --run-firtool --- elaborator/src/rocketv/Decoder.scala | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 elaborator/src/rocketv/Decoder.scala diff --git a/elaborator/src/rocketv/Decoder.scala b/elaborator/src/rocketv/Decoder.scala new file mode 100644 index 000000000..29f305d7e --- /dev/null +++ b/elaborator/src/rocketv/Decoder.scala @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{Decoder, DecoderParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object Decoder extends Elaborator { + @main + case class DecoderParameterMain( + @arg(name = "instructionSets") instructionSets: Set[String], + @arg(name = "pipelinedMul") pipelinedMul: Boolean, + @arg(name = "fenceIFlushDCache") fenceIFlushDCache: Boolean) { + def convert: DecoderParameter = DecoderParameter( + instructionSets, + pipelinedMul, + fenceIFlushDCache + ) + } + + implicit def DecoderParameterMainParser: ParserForClass[DecoderParameterMain] = ParserForClass[DecoderParameterMain] + + @main + def config(@arg(name = "parameter") parameter: DecoderParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[Decoder, DecoderParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From e67243a648b3e68e10b298f26687955c1b336e89 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sat, 29 Jun 2024 22:51:47 +0800 Subject: [PATCH 042/140] [rocketv] copy RVCExpander into rocketv project --- rocketv/src/RVCExpander.scala | 172 ++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 rocketv/src/RVCExpander.scala diff --git a/rocketv/src/RVCExpander.scala b/rocketv/src/RVCExpander.scala new file mode 100644 index 000000000..c62430cb7 --- /dev/null +++ b/rocketv/src/RVCExpander.scala @@ -0,0 +1,172 @@ +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util._ +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ + +class ExpandedInstruction extends Bundle { + val bits = UInt(32.W) + val rd = UInt(5.W) + val rs1 = UInt(5.W) + val rs2 = UInt(5.W) + val rs3 = UInt(5.W) +} + +class RVCDecoder(x: UInt, xLen: Int, useAddiForMv: Boolean = false) { + def inst(bits: UInt, rd: UInt = x(11, 7), rs1: UInt = x(19, 15), rs2: UInt = x(24, 20), rs3: UInt = x(31, 27)) = { + val res = Wire(new ExpandedInstruction) + res.bits := bits + res.rd := rd + res.rs1 := rs1 + res.rs2 := rs2 + res.rs3 := rs3 + res + } + + def rs1p = Cat(1.U(2.W), x(9, 7)) + def rs2p = Cat(1.U(2.W), x(4, 2)) + def rs2 = x(6, 2) + def rd = x(11, 7) + def addi4spnImm = Cat(x(10, 7), x(12, 11), x(5), x(6), 0.U(2.W)) + def lwImm = Cat(x(5), x(12, 10), x(6), 0.U(2.W)) + def ldImm = Cat(x(6, 5), x(12, 10), 0.U(3.W)) + def lwspImm = Cat(x(3, 2), x(12), x(6, 4), 0.U(2.W)) + def ldspImm = Cat(x(4, 2), x(12), x(6, 5), 0.U(3.W)) + def swspImm = Cat(x(8, 7), x(12, 9), 0.U(2.W)) + def sdspImm = Cat(x(9, 7), x(12, 10), 0.U(3.W)) + def luiImm = Cat(Fill(15, x(12)), x(6, 2), 0.U(12.W)) + def addi16spImm = Cat(Fill(3, x(12)), x(4, 3), x(5), x(2), x(6), 0.U(4.W)) + def addiImm = Cat(Fill(7, x(12)), x(6, 2)) + def jImm = Cat(Fill(10, x(12)), x(8), x(10, 9), x(6), x(7), x(2), x(11), x(5, 3), 0.U(1.W)) + def bImm = Cat(Fill(5, x(12)), x(6, 5), x(2), x(11, 10), x(4, 3), 0.U(1.W)) + def shamt = Cat(x(12), x(6, 2)) + def x0 = 0.U(5.W) + def ra = 1.U(5.W) + def sp = 2.U(5.W) + + def q0 = { + def addi4spn = { + val opc = Mux(x(12, 5).orR, 0x13.U(7.W), 0x1f.U(7.W)) + inst(Cat(addi4spnImm, sp, 0.U(3.W), rs2p, opc), rs2p, sp, rs2p) + } + def ld = inst(Cat(ldImm, rs1p, 3.U(3.W), rs2p, 0x03.U(7.W)), rs2p, rs1p, rs2p) + def lw = inst(Cat(lwImm, rs1p, 2.U(3.W), rs2p, 0x03.U(7.W)), rs2p, rs1p, rs2p) + def fld = inst(Cat(ldImm, rs1p, 3.U(3.W), rs2p, 0x07.U(7.W)), rs2p, rs1p, rs2p) + def flw = { + if (xLen == 32) inst(Cat(lwImm, rs1p, 2.U(3.W), rs2p, 0x07.U(7.W)), rs2p, rs1p, rs2p) + else ld + } + def unimp = inst(Cat(lwImm >> 5, rs2p, rs1p, 2.U(3.W), lwImm(4, 0), 0x3f.U(7.W)), rs2p, rs1p, rs2p) + def sd = inst(Cat(ldImm >> 5, rs2p, rs1p, 3.U(3.W), ldImm(4, 0), 0x23.U(7.W)), rs2p, rs1p, rs2p) + def sw = inst(Cat(lwImm >> 5, rs2p, rs1p, 2.U(3.W), lwImm(4, 0), 0x23.U(7.W)), rs2p, rs1p, rs2p) + def fsd = inst(Cat(ldImm >> 5, rs2p, rs1p, 3.U(3.W), ldImm(4, 0), 0x27.U(7.W)), rs2p, rs1p, rs2p) + def fsw = { + if (xLen == 32) inst(Cat(lwImm >> 5, rs2p, rs1p, 2.U(3.W), lwImm(4, 0), 0x27.U(7.W)), rs2p, rs1p, rs2p) + else sd + } + Seq(addi4spn, fld, lw, flw, unimp, fsd, sw, fsw) + } + + def q1 = { + def addi = inst(Cat(addiImm, rd, 0.U(3.W), rd, 0x13.U(7.W)), rd, rd, rs2p) + def addiw = { + val opc = Mux(rd.orR, 0x1b.U(7.W), 0x1f.U(7.W)) + inst(Cat(addiImm, rd, 0.U(3.W), rd, opc), rd, rd, rs2p) + } + def jal = { + if (xLen == 32) inst(Cat(jImm(20), jImm(10, 1), jImm(11), jImm(19, 12), ra, 0x6f.U(7.W)), ra, rd, rs2p) + else addiw + } + def li = inst(Cat(addiImm, x0, 0.U(3.W), rd, 0x13.U(7.W)), rd, x0, rs2p) + def addi16sp = { + val opc = Mux(addiImm.orR, 0x13.U(7.W), 0x1f.U(7.W)) + inst(Cat(addi16spImm, rd, 0.U(3.W), rd, opc), rd, rd, rs2p) + } + def lui = { + val opc = Mux(addiImm.orR, 0x37.U(7.W), 0x3f.U(7.W)) + val me = inst(Cat(luiImm(31, 12), rd, opc), rd, rd, rs2p) + Mux(rd === x0 || rd === sp, addi16sp, me) + } + def j = inst(Cat(jImm(20), jImm(10, 1), jImm(11), jImm(19, 12), x0, 0x6f.U(7.W)), x0, rs1p, rs2p) + def beqz = inst(Cat(bImm(12), bImm(10, 5), x0, rs1p, 0.U(3.W), bImm(4, 1), bImm(11), 0x63.U(7.W)), rs1p, rs1p, x0) + def bnez = inst(Cat(bImm(12), bImm(10, 5), x0, rs1p, 1.U(3.W), bImm(4, 1), bImm(11), 0x63.U(7.W)), x0, rs1p, x0) + def arith = { + def srli = Cat(shamt, rs1p, 5.U(3.W), rs1p, 0x13.U(7.W)) + def srai = srli | (1 << 30).U + def andi = Cat(addiImm, rs1p, 7.U(3.W), rs1p, 0x13.U(7.W)) + def rtype = { + val funct = Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U)(Cat(x(12), x(6, 5))) + val sub = Mux(x(6, 5) === 0.U, (1 << 30).U, 0.U) + val opc = Mux(x(12), 0x3b.U(7.W), 0x33.U(7.W)) + Cat(rs2p, rs1p, funct, rs1p, opc) | sub + } + inst(Seq(srli, srai, andi, rtype)(x(11, 10)), rs1p, rs1p, rs2p) + } + Seq(addi, jal, li, lui, arith, j, beqz, bnez) + } + + def q2 = { + val load_opc = Mux(rd.orR, 0x03.U(7.W), 0x1f.U(7.W)) + def slli = inst(Cat(shamt, rd, 1.U(3.W), rd, 0x13.U(7.W)), rd, rd, rs2) + def ldsp = inst(Cat(ldspImm, sp, 3.U(3.W), rd, load_opc), rd, sp, rs2) + def lwsp = inst(Cat(lwspImm, sp, 2.U(3.W), rd, load_opc), rd, sp, rs2) + def fldsp = inst(Cat(ldspImm, sp, 3.U(3.W), rd, 0x07.U(7.W)), rd, sp, rs2) + def flwsp = { + if (xLen == 32) inst(Cat(lwspImm, sp, 2.U(3.W), rd, 0x07.U(7.W)), rd, sp, rs2) + else ldsp + } + def sdsp = inst(Cat(sdspImm >> 5, rs2, sp, 3.U(3.W), sdspImm(4, 0), 0x23.U(7.W)), rd, sp, rs2) + def swsp = inst(Cat(swspImm >> 5, rs2, sp, 2.U(3.W), swspImm(4, 0), 0x23.U(7.W)), rd, sp, rs2) + def fsdsp = inst(Cat(sdspImm >> 5, rs2, sp, 3.U(3.W), sdspImm(4, 0), 0x27.U(7.W)), rd, sp, rs2) + def fswsp = { + if (xLen == 32) inst(Cat(swspImm >> 5, rs2, sp, 2.U(3.W), swspImm(4, 0), 0x27.U(7.W)), rd, sp, rs2) + else sdsp + } + def jalr = { + val mv = { + if (useAddiForMv) inst(Cat(rs2, 0.U(3.W), rd, 0x13.U(7.W)), rd, rs2, x0) + else inst(Cat(rs2, x0, 0.U(3.W), rd, 0x33.U(7.W)), rd, x0, rs2) + } + val add = inst(Cat(rs2, rd, 0.U(3.W), rd, 0x33.U(7.W)), rd, rd, rs2) + val jr = Cat(rs2, rd, 0.U(3.W), x0, 0x67.U(7.W)) + val reserved = Cat(jr >> 7, 0x1f.U(7.W)) + val jr_reserved = inst(Mux(rd.orR, jr, reserved), x0, rd, rs2) + val jr_mv = Mux(rs2.orR, mv, jr_reserved) + val jalr = Cat(rs2, rd, 0.U(3.W), ra, 0x67.U(7.W)) + val ebreak = Cat(jr >> 7, 0x73.U(7.W)) | (1 << 20).U + val jalr_ebreak = inst(Mux(rd.orR, jalr, ebreak), ra, rd, rs2) + val jalr_add = Mux(rs2.orR, add, jalr_ebreak) + Mux(x(12), jalr_add, jr_mv) + } + Seq(slli, fldsp, lwsp, flwsp, jalr, fsdsp, swsp, fswsp) + } + + def q3 = Seq.fill(8)(passthrough) + + def passthrough = inst(x) + + def decode = { + val s = q0 ++ q1 ++ q2 ++ q3 + s(Cat(x(1, 0), x(15, 13))) + } +} + +class RVCExpander(useAddiForMv: Boolean = false)(implicit val p: Parameters) extends Module with HasCoreParameters { + val io = IO(new Bundle { + val in = Input(UInt(32.W)) + val out = Output(new ExpandedInstruction) + val rvc = Output(Bool()) + }) + + if (usingCompressed) { + io.rvc := io.in(1, 0) =/= 3.U + io.out := new RVCDecoder(io.in, p(XLen), useAddiForMv).decode + } else { + io.rvc := false.B + io.out := new RVCDecoder(io.in, p(XLen), useAddiForMv).passthrough + } +} From 6514bb03433517d7c5edbe33be50da5b77d9c973 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sat, 29 Jun 2024 23:04:58 +0800 Subject: [PATCH 043/140] [rocketv] migrate RVCExpander --- rocketv/src/Bundle.scala | 7 ++++ rocketv/src/RVCExpander.scala | 60 ++++++++++++++++++++--------------- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 33f128b93..bdbf0ef36 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -529,3 +529,10 @@ class MNStatus extends Bundle { val zero1 = UInt(3.W) } +class ExpandedInstruction extends Bundle { + val bits = UInt(32.W) + val rd = UInt(5.W) + val rs1 = UInt(5.W) + val rs2 = UInt(5.W) + val rs3 = UInt(5.W) +} diff --git a/rocketv/src/RVCExpander.scala b/rocketv/src/RVCExpander.scala index c62430cb7..b0e5b0795 100644 --- a/rocketv/src/RVCExpander.scala +++ b/rocketv/src/RVCExpander.scala @@ -1,21 +1,14 @@ -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util._ -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.tile._ -import freechips.rocketchip.util._ - -class ExpandedInstruction extends Bundle { - val bits = UInt(32.W) - val rd = UInt(5.W) - val rs1 = UInt(5.W) - val rs2 = UInt(5.W) - val rs3 = UInt(5.W) -} +// TODO: add a clear documentation on this... class RVCDecoder(x: UInt, xLen: Int, useAddiForMv: Boolean = false) { def inst(bits: UInt, rd: UInt = x(11, 7), rs1: UInt = x(19, 15), rs2: UInt = x(24, 20), rs3: UInt = x(31, 27)) = { val res = Wire(new ExpandedInstruction) @@ -99,12 +92,12 @@ class RVCDecoder(x: UInt, xLen: Int, useAddiForMv: Boolean = false) { def srai = srli | (1 << 30).U def andi = Cat(addiImm, rs1p, 7.U(3.W), rs1p, 0x13.U(7.W)) def rtype = { - val funct = Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U)(Cat(x(12), x(6, 5))) + val funct = VecInit(Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U))(Cat(x(12), x(6, 5))) val sub = Mux(x(6, 5) === 0.U, (1 << 30).U, 0.U) val opc = Mux(x(12), 0x3b.U(7.W), 0x33.U(7.W)) Cat(rs2p, rs1p, funct, rs1p, opc) | sub } - inst(Seq(srli, srai, andi, rtype)(x(11, 10)), rs1p, rs1p, rs2p) + inst(VecInit(Seq(srli, srai, andi, rtype))(x(11, 10)), rs1p, rs1p, rs2p) } Seq(addi, jal, li, lui, arith, j, beqz, bnez) } @@ -151,22 +144,39 @@ class RVCDecoder(x: UInt, xLen: Int, useAddiForMv: Boolean = false) { def decode = { val s = q0 ++ q1 ++ q2 ++ q3 - s(Cat(x(1, 0), x(15, 13))) + VecInit(s)(Cat(x(1, 0), x(15, 13))) } } -class RVCExpander(useAddiForMv: Boolean = false)(implicit val p: Parameters) extends Module with HasCoreParameters { - val io = IO(new Bundle { - val in = Input(UInt(32.W)) - val out = Output(new ExpandedInstruction) - val rvc = Output(Bool()) - }) +object RVCExpanderParameter { + implicit def rwP: upickle.default.ReadWriter[RVCExpanderParameter] = upickle.default.macroRW[RVCExpanderParameter] +} + +case class RVCExpanderParameter( + xLen: Int, + usingCompressed: Boolean) + extends SerializableModuleParameter { + val useAddiForMv: Boolean = false +} + +class RVCExpanderInterface(parameter: RVCExpanderParameter) extends Bundle { + val in = Input(UInt(32.W)) + val out = Output(new ExpandedInstruction) + val rvc = Output(Bool()) +} +@instantiable +class RVCExpander(val parameter: RVCExpanderParameter) + extends FixedIORawModule(new RVCExpanderInterface(parameter)) + with SerializableModule[RVCExpanderParameter] { + val usingCompressed = parameter.usingCompressed + val useAddiForMv = parameter.useAddiForMv + val xLen = parameter.xLen if (usingCompressed) { io.rvc := io.in(1, 0) =/= 3.U - io.out := new RVCDecoder(io.in, p(XLen), useAddiForMv).decode + io.out := new RVCDecoder(io.in, xLen, useAddiForMv).decode } else { io.rvc := false.B - io.out := new RVCDecoder(io.in, p(XLen), useAddiForMv).passthrough + io.out := new RVCDecoder(io.in, xLen, useAddiForMv).passthrough } } From d9a3f2ed5fea3331d514f1d873b53f86b62cb61c Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sat, 29 Jun 2024 23:10:32 +0800 Subject: [PATCH 044/140] [rocketv] add elaborator for RVCExpander - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.RVCExpander config --xLen 32 --usingCompressed true - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.RVCExpander design --parameter ./RVCExpander.json --run-firtool --- elaborator/src/rocketv/RVCExpander.scala | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 elaborator/src/rocketv/RVCExpander.scala diff --git a/elaborator/src/rocketv/RVCExpander.scala b/elaborator/src/rocketv/RVCExpander.scala new file mode 100644 index 000000000..2cf3aa7df --- /dev/null +++ b/elaborator/src/rocketv/RVCExpander.scala @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{RVCExpander, RVCExpanderParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object RVCExpander extends Elaborator { + @main + case class RVCExpanderParameterMain( + @arg(name = "xLen") xLen: Int, + @arg(name = "usingCompressed") usingCompressed: Boolean) { + def convert: RVCExpanderParameter = RVCExpanderParameter( + xLen, + usingCompressed + ) + } + + implicit def RVCExpanderParameterMainParser: ParserForClass[RVCExpanderParameterMain] = + ParserForClass[RVCExpanderParameterMain] + + @main + def config(@arg(name = "parameter") parameter: RVCExpanderParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[RVCExpander, RVCExpanderParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 1bf968b6cf2bf1ca48c10dcb9c09654e8fedb77f Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sat, 29 Jun 2024 23:16:27 +0800 Subject: [PATCH 045/140] [rocketv] copy IBuf into rocketv project --- rocketv/src/IBuf.scala | 139 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 rocketv/src/IBuf.scala diff --git a/rocketv/src/IBuf.scala b/rocketv/src/IBuf.scala new file mode 100644 index 000000000..3d70b1d57 --- /dev/null +++ b/rocketv/src/IBuf.scala @@ -0,0 +1,139 @@ +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{log2Ceil, Cat, Decoupled, Fill, UIntToOH} +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ + +class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters { + val xcpt0 = new FrontendExceptions // exceptions on first half of instruction + val xcpt1 = new FrontendExceptions // exceptions on second half of instruction + val replay = Bool() + val rvc = Bool() + val inst = new ExpandedInstruction + val raw = UInt(32.W) + require(coreInstBits == (if (usingCompressed) 16 else 32)) +} + +/** handle Cext. */ +class IBuf(implicit p: Parameters) extends CoreModule { + val io = IO(new Bundle { + // 3. Frontend fetched data will input to here. + val imem = Flipped(Decoupled(new FrontendResp)) + val kill = Input(Bool()) + val pc = Output(UInt(vaddrBitsExtended.W)) + val btb_resp = Output(new BTBResp()) + // 4. Give out the instruction to Decode. + val inst = Vec(retireWidth, Decoupled(new Instruction)) + }) + + // This module is meant to be more general, but it's not there yet + require(decodeWidth == 1) + + val n = fetchWidth - 1 + val nBufValid = if (n == 0) 0.U else RegInit(init = 0.U(log2Ceil(fetchWidth).W)) + val buf = Reg(chiselTypeOf(io.imem.bits)) + val ibufBTBResp = Reg(new BTBResp) + val pcWordMask = (coreInstBytes * fetchWidth - 1).U(vaddrBitsExtended.W) + + val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth * coreInstBytes) - 1, log2Ceil(coreInstBytes)) + val nReady = WireDefault(0.U(log2Ceil(fetchWidth + 1).W)) + val nIC = Mux(io.imem.bits.btb.taken, io.imem.bits.btb.bridx +& 1.U, fetchWidth.U) - pcWordBits + val nICReady = nReady - nBufValid + val nValid = Mux(io.imem.valid, nIC, 0.U) + nBufValid + io.imem.ready := io.inst(0).ready && nReady >= nBufValid && (nICReady >= nIC || n.U >= nIC - nICReady) + + if (n > 0) { + when(io.inst(0).ready) { + nBufValid := Mux(nReady >== nBufValid, 0.U, nBufValid - nReady) + if (n > 1) when(nReady > 0.U && nReady < nBufValid) { + val shiftedBuf = + shiftInsnRight(buf.data(n * coreInstBits - 1, coreInstBits), (nReady - 1.U)(log2Ceil(n - 1) - 1, 0)) + buf.data := Cat( + buf.data(n * coreInstBits - 1, (n - 1) * coreInstBits), + shiftedBuf((n - 1) * coreInstBits - 1, 0) + ) + buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask + } + when(io.imem.valid && nReady >= nBufValid && nICReady < nIC && n.U >= nIC - nICReady) { + val shamt = pcWordBits + nICReady + nBufValid := nIC - nICReady + buf := io.imem.bits + buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n * coreInstBits - 1, 0) + buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask + ibufBTBResp := io.imem.bits.btb + } + } + when(io.kill) { + nBufValid := 0.U + } + } + + val icShiftAmt = (fetchWidth.U + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0) + val icData = + shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits - 1, 0))), icShiftAmt) + .extract(3 * fetchWidth * coreInstBits - 1, 2 * fetchWidth * coreInstBits) + val icMask = + (~0.U((fetchWidth * coreInstBits).W) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth * coreInstBits - 1, 0) + val inst = icData & icMask | buf.data & ~icMask + + val valid = (UIntToOH(nValid) - 1.U)(fetchWidth - 1, 0) + val bufMask = UIntToOH(nBufValid) - 1.U + val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt)) + val buf_replay = Mux(buf.replay, bufMask, 0.U) + val ic_replay = buf_replay | Mux(io.imem.bits.replay, valid & ~bufMask, 0.U) + assert(!io.imem.valid || !io.imem.bits.btb.taken || io.imem.bits.btb.bridx >= pcWordBits) + + io.btb_resp := io.imem.bits.btb + io.pc := Mux(nBufValid > 0.U, buf.pc, io.imem.bits.pc) + expand(0, 0.U, inst) + + def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) { + val exp = Module(new RVCExpander) + exp.io.in := curInst + io.inst(i).bits.inst := exp.io.out + io.inst(i).bits.raw := curInst + + if (usingCompressed) { + val replay = ic_replay(j) || (!exp.io.rvc && ic_replay(j + 1.U)) + val full_insn = exp.io.rvc || valid(j + 1.U) || buf_replay(j) + io.inst(i).valid := valid(j) && full_insn + io.inst(i).bits.xcpt0 := xcpt(j) + io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j + 1.U).asUInt).asTypeOf(new FrontendExceptions) + io.inst(i).bits.replay := replay + io.inst(i).bits.rvc := exp.io.rvc + + when((bufMask(j) && exp.io.rvc) || bufMask(j + 1.U)) { io.btb_resp := ibufBTBResp } + + when(full_insn && ((i == 0).B || io.inst(i).ready)) { nReady := Mux(exp.io.rvc, j + 1.U, j + 2.U) } + + expand(i + 1, Mux(exp.io.rvc, j + 1.U, j + 2.U), Mux(exp.io.rvc, curInst >> 16, curInst >> 32)) + } else { + when((i == 0).B || io.inst(i).ready) { nReady := (i + 1).U } + io.inst(i).valid := valid(i) + io.inst(i).bits.xcpt0 := xcpt(i) + io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions) + io.inst(i).bits.replay := ic_replay(i) + io.inst(i).bits.rvc := false.B + + expand(i + 1, null, curInst >> 32) + } + } + + def shiftInsnLeft(in: UInt, dist: UInt) = { + val r = in.getWidth / coreInstBits + require(in.getWidth % coreInstBits == 0) + val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r - 1) * coreInstBits), in) + data << (dist << log2Ceil(coreInstBits)) + } + + def shiftInsnRight(in: UInt, dist: UInt) = { + val r = in.getWidth / coreInstBits + require(in.getWidth % coreInstBits == 0) + val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r - 1) * coreInstBits), in) + data >> (dist << log2Ceil(coreInstBits)) + } +} From cd105e7f2dfa0ca07c3cc787ff28f3ca723d7502 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 00:56:10 +0800 Subject: [PATCH 046/140] [rocketv] migrate IBuf --- rocketv/src/Bundle.scala | 32 +++++++++ rocketv/src/IBuf.scala | 140 +++++++++++++++++++++++++++------------ 2 files changed, 131 insertions(+), 41 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index bdbf0ef36..afd0d557b 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -536,3 +536,35 @@ class ExpandedInstruction extends Bundle { val rs2 = UInt(5.W) val rs3 = UInt(5.W) } + +class FrontendResp( + vaddrBits: Int, + entries: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int], + vaddrBitsExtended: Int, + coreInstBits: Int) + extends Bundle { + def fetchWidth = 1 + val btb = new BTBResp(vaddrBits, entries, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) + val pc = UInt(vaddrBitsExtended.W) // ID stage PC + val data = UInt((fetchWidth * coreInstBits).W) + val mask = UInt(fetchWidth.W) + val xcpt = new FrontendExceptions + val replay = Bool() +} + +class FrontendExceptions extends Bundle { + val pf = Bool() + val gf = Bool() + val ae = Bool() +} + +class Instruction extends Bundle { + val xcpt0 = new FrontendExceptions // exceptions on first half of instruction + val xcpt1 = new FrontendExceptions // exceptions on second half of instruction + val replay = Bool() + val rvc = Bool() + val inst = new ExpandedInstruction + val raw = UInt(32.W) +} \ No newline at end of file diff --git a/rocketv/src/IBuf.scala b/rocketv/src/IBuf.scala index 3d70b1d57..781d2a94e 100644 --- a/rocketv/src/IBuf.scala +++ b/rocketv/src/IBuf.scala @@ -1,45 +1,100 @@ -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{log2Ceil, Cat, Decoupled, Fill, UIntToOH} -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.tile._ -import freechips.rocketchip.util._ - -class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters { - val xcpt0 = new FrontendExceptions // exceptions on first half of instruction - val xcpt1 = new FrontendExceptions // exceptions on second half of instruction - val replay = Bool() - val rvc = Bool() - val inst = new ExpandedInstruction - val raw = UInt(32.W) - require(coreInstBits == (if (usingCompressed) 16 else 32)) +import chisel3.experimental.hierarchy.{Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object IBufParameter { + implicit def rwP: upickle.default.ReadWriter[IBufParameter] = upickle.default.macroRW[IBufParameter] } -/** handle Cext. */ -class IBuf(implicit p: Parameters) extends CoreModule { - val io = IO(new Bundle { - // 3. Frontend fetched data will input to here. - val imem = Flipped(Decoupled(new FrontendResp)) - val kill = Input(Bool()) - val pc = Output(UInt(vaddrBitsExtended.W)) - val btb_resp = Output(new BTBResp()) - // 4. Give out the instruction to Decode. - val inst = Vec(retireWidth, Decoupled(new Instruction)) - }) - - // This module is meant to be more general, but it's not there yet - require(decodeWidth == 1) +case class IBufParameter( + useAsyncReset: Boolean, + xLen: Int, + usingCompressed: Boolean, + vaddrBits: Int, + entries: Int, + // TODO: have a better way to calculate it, like what we did in the CSR... + vaddrBitsExtended: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int], + fetchWidth: Int + ) extends SerializableModuleParameter { + val retireWidth: Int = 1 + val coreInstBits: Int = if (usingCompressed) 16 else 32 + val coreInstBytes: Int = coreInstBits / 8 +} + +class IBufInterface(parameter: IBufParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val imem = Flipped( + Decoupled( + new FrontendResp( + parameter.vaddrBits, + parameter.entries, + parameter.bhtHistoryLength, + parameter.bhtCounterLength, + parameter.vaddrBitsExtended, + parameter.coreInstBits, + parameter.fetchWidth + ) + ) + ) + val kill = Input(Bool()) + val pc = Output(UInt(parameter.vaddrBitsExtended.W)) + val btb_resp = Output( + new BTBResp( + parameter.vaddrBits, + parameter.entries, + parameter.fetchWidth, + parameter.bhtHistoryLength, + parameter.bhtCounterLength + ) + ) + // 4. Give out the instruction to Decode. + val inst = Vec(parameter.retireWidth, Decoupled(new Instruction)) +} + +@instantiable +class IBuf(val parameter: IBufParameter) + extends FixedIORawModule(new IBufInterface(parameter)) + with SerializableModule[IBufParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val xLen = parameter.xLen + val fetchWidth = parameter.fetchWidth + val vaddrBits = parameter.vaddrBits + val entries = parameter.entries + val bhtHistoryLength = parameter.bhtHistoryLength + val bhtCounterLength = parameter.bhtCounterLength + val coreInstBytes = parameter.coreInstBytes + val vaddrBitsExtended = parameter.vaddrBitsExtended + val coreInstBits = parameter.coreInstBits + val retireWidth = parameter.retireWidth + val usingCompressed = parameter.usingCompressed val n = fetchWidth - 1 val nBufValid = if (n == 0) 0.U else RegInit(init = 0.U(log2Ceil(fetchWidth).W)) val buf = Reg(chiselTypeOf(io.imem.bits)) - val ibufBTBResp = Reg(new BTBResp) + val ibufBTBResp = Reg( + new BTBResp( + vaddrBits, + entries, + fetchWidth, + bhtHistoryLength, + bhtCounterLength + ) + ) val pcWordMask = (coreInstBytes * fetchWidth - 1).U(vaddrBitsExtended.W) - - val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth * coreInstBytes) - 1, log2Ceil(coreInstBytes)) + val pcWordBits = io.imem.bits.pc(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes)) val nReady = WireDefault(0.U(log2Ceil(fetchWidth + 1).W)) val nIC = Mux(io.imem.bits.btb.taken, io.imem.bits.btb.bridx +& 1.U, fetchWidth.U) - pcWordBits val nICReady = nReady - nBufValid @@ -48,7 +103,7 @@ class IBuf(implicit p: Parameters) extends CoreModule { if (n > 0) { when(io.inst(0).ready) { - nBufValid := Mux(nReady >== nBufValid, 0.U, nBufValid - nReady) + nBufValid := Mux((nReady >= nBufValid) || nBufValid === 0.U, 0.U, nBufValid - nReady) if (n > 1) when(nReady > 0.U && nReady < nBufValid) { val shiftedBuf = shiftInsnRight(buf.data(n * coreInstBits - 1, coreInstBits), (nReady - 1.U)(log2Ceil(n - 1) - 1, 0)) @@ -74,8 +129,10 @@ class IBuf(implicit p: Parameters) extends CoreModule { val icShiftAmt = (fetchWidth.U + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0) val icData = - shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits - 1, 0))), icShiftAmt) - .extract(3 * fetchWidth * coreInstBits - 1, 2 * fetchWidth * coreInstBits) + shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits - 1, 0))), icShiftAmt)( + 3 * fetchWidth * coreInstBits - 1, + 2 * fetchWidth * coreInstBits + ) val icMask = (~0.U((fetchWidth * coreInstBits).W) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth * coreInstBits - 1, 0) val inst = icData & icMask | buf.data & ~icMask @@ -92,7 +149,8 @@ class IBuf(implicit p: Parameters) extends CoreModule { expand(0, 0.U, inst) def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) { - val exp = Module(new RVCExpander) + // TODO: Dont instantiate it unless usingCompressed is true + val exp = Instantiate(new RVCExpander(RVCExpanderParameter(xLen, usingCompressed))) exp.io.in := curInst io.inst(i).bits.inst := exp.io.out io.inst(i).bits.raw := curInst @@ -101,8 +159,8 @@ class IBuf(implicit p: Parameters) extends CoreModule { val replay = ic_replay(j) || (!exp.io.rvc && ic_replay(j + 1.U)) val full_insn = exp.io.rvc || valid(j + 1.U) || buf_replay(j) io.inst(i).valid := valid(j) && full_insn - io.inst(i).bits.xcpt0 := xcpt(j) - io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j + 1.U).asUInt).asTypeOf(new FrontendExceptions) + io.inst(i).bits.xcpt0 := VecInit(xcpt)(j) + io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, VecInit(xcpt)(j + 1.U).asUInt).asTypeOf(new FrontendExceptions) io.inst(i).bits.replay := replay io.inst(i).bits.rvc := exp.io.rvc @@ -123,14 +181,14 @@ class IBuf(implicit p: Parameters) extends CoreModule { } } - def shiftInsnLeft(in: UInt, dist: UInt) = { + def shiftInsnLeft(in: UInt, dist: UInt): UInt = { val r = in.getWidth / coreInstBits require(in.getWidth % coreInstBits == 0) val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r - 1) * coreInstBits), in) data << (dist << log2Ceil(coreInstBits)) } - def shiftInsnRight(in: UInt, dist: UInt) = { + def shiftInsnRight(in: UInt, dist: UInt): UInt = { val r = in.getWidth / coreInstBits require(in.getWidth % coreInstBits == 0) val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r - 1) * coreInstBits), in) From 6a2cb827c25b61c616ec2b644248249de91de5b0 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 00:56:44 +0800 Subject: [PATCH 047/140] [rocketv] add elaborator for IBuf - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.IBuf design --useAsyncReset true --xLen 32 --usingCompressed true --vaddrBits 32 --entries 4 --vaddrBitsExtended 32 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.IBuf design --parameter ./IBuf.json --run-firtool --- elaborator/src/rocketv/IBuf.scala | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 elaborator/src/rocketv/IBuf.scala diff --git a/elaborator/src/rocketv/IBuf.scala b/elaborator/src/rocketv/IBuf.scala new file mode 100644 index 000000000..1e2ac17da --- /dev/null +++ b/elaborator/src/rocketv/IBuf.scala @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{IBuf, IBufParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object IBuf extends Elaborator { + @main + case class IBufParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "usingCompressed") usingCompressed: Boolean, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "entries") entries: Int, + @arg(name = "vaddrBitsExtended") vaddrBitsExtended: Int, + @arg(name = "bhtHistoryLength") bhtHistoryLength: Option[Int], + @arg(name = "bhtCounterLength") bhtCounterLength: Option[Int], + @arg(name = "fetchWidth") fetchWidth: Int) { + def convert: IBufParameter = IBufParameter( + useAsyncReset, + xLen, + usingCompressed, + vaddrBits, + entries, + vaddrBitsExtended, + bhtHistoryLength, + bhtCounterLength, + fetchWidth + ) + } + + implicit def IBufParameterMainParser: ParserForClass[IBufParameterMain] = ParserForClass[IBufParameterMain] + + @main + def config(@arg(name = "parameter") parameter: IBufParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[IBuf, IBufParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From b5d19f699f905e954d8989f5bca91f5fff4a8cdb Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 00:59:27 +0800 Subject: [PATCH 048/140] [rocketv] copy Multipler into rocketv project --- rocketv/src/Multiplier.scala | 219 +++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 rocketv/src/Multiplier.scala diff --git a/rocketv/src/Multiplier.scala b/rocketv/src/Multiplier.scala new file mode 100644 index 000000000..74b57821c --- /dev/null +++ b/rocketv/src/Multiplier.scala @@ -0,0 +1,219 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{log2Ceil, log2Floor, log2Up, Cat, Decoupled, Enum, Fill, Log2, Pipe, Valid} +import freechips.rocketchip.util._ +// TODO: remove it +import freechips.rocketchip.rocket.{DecodeLogic, MulDivParams} + +class MultiplierReq(dataBits: Int, tagBits: Int, aluFn: ALUFN = new ALUFN) extends Bundle { + val fn = Bits(aluFn.SZ_ALU_FN.W) + val dw = Bits(SZ_DW.W) + val in1 = Bits(dataBits.W) + val in2 = Bits(dataBits.W) + val tag = UInt(tagBits.W) +} + +class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { + val data = Bits(dataBits.W) + val tag = UInt(tagBits.W) +} + +class MultiplierIO(val dataBits: Int, val tagBits: Int, aluFn: ALUFN = new ALUFN) extends Bundle { + val req = Flipped(Decoupled(new MultiplierReq(dataBits, tagBits, aluFn))) + val kill = Input(Bool()) + val resp = Decoupled(new MultiplierResp(dataBits, tagBits)) +} + +class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32, aluFn: ALUFN = new ALUFN) extends Module { + private def minDivLatency = (cfg.divUnroll > 0).option(if (cfg.divEarlyOut) 3 else 1 + w / cfg.divUnroll) + private def minMulLatency = (cfg.mulUnroll > 0).option(if (cfg.mulEarlyOut) 2 else w / cfg.mulUnroll) + def minLatency: Int = (minDivLatency ++ minMulLatency).min + + val io = IO(new MultiplierIO(width, log2Up(nXpr), aluFn)) + val w = io.req.bits.in1.getWidth + val mulw = if (cfg.mulUnroll == 0) w else (w + cfg.mulUnroll - 1) / cfg.mulUnroll * cfg.mulUnroll + val fastMulW = if (cfg.mulUnroll == 0) false else w / 2 > cfg.mulUnroll && w % (2 * cfg.mulUnroll) == 0 + + val s_ready :: s_neg_inputs :: s_mul :: s_div :: s_dummy :: s_neg_output :: s_done_mul :: s_done_div :: Nil = Enum(8) + val state = RegInit(s_ready) + + val req = Reg(chiselTypeOf(io.req.bits)) + val count = Reg( + UInt( + log2Ceil( + ((cfg.divUnroll != 0).option(w / cfg.divUnroll + 1).toSeq ++ + (cfg.mulUnroll != 0).option(mulw / cfg.mulUnroll)).reduce(_ max _) + ).W + ) + ) + val neg_out = Reg(Bool()) + val isHi = Reg(Bool()) + val resHi = Reg(Bool()) + val divisor = Reg(Bits((w + 1).W)) // div only needs w bits + val remainder = Reg(Bits((2 * mulw + 2).W)) // div only needs 2*w+1 bits + + val mulDecode = List( + aluFn.FN_MUL -> List(Y, N, X, X), + aluFn.FN_MULH -> List(Y, Y, Y, Y), + aluFn.FN_MULHU -> List(Y, Y, N, N), + aluFn.FN_MULHSU -> List(Y, Y, Y, N) + ) + val divDecode = List( + aluFn.FN_DIV -> List(N, N, Y, Y), + aluFn.FN_REM -> List(N, Y, Y, Y), + aluFn.FN_DIVU -> List(N, N, N, N), + aluFn.FN_REMU -> List(N, Y, N, N) + ) + val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil = + DecodeLogic( + io.req.bits.fn, + List(X, X, X, X), + (if (cfg.divUnroll != 0) divDecode else Nil) ++ (if (cfg.mulUnroll != 0) mulDecode else Nil) + ).map(_.asBool) + + require(w == 32 || w == 64) + def halfWidth(req: MultiplierReq) = (w > 32).B && req.dw === DW_32 + + def sext(x: Bits, halfW: Bool, signed: Bool) = { + val sign = signed && Mux(halfW, x(w / 2 - 1), x(w - 1)) + val hi = Mux(halfW, Fill(w / 2, sign), x(w - 1, w / 2)) + (Cat(hi, x(w / 2 - 1, 0)), sign) + } + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned) + + val subtractor = remainder(2 * w, w) - divisor + val result = Mux(resHi, remainder(2 * w, w + 1), remainder(w - 1, 0)) + val negated_remainder = -result + + if (cfg.divUnroll != 0) when(state === s_neg_inputs) { + when(remainder(w - 1)) { + remainder := negated_remainder + } + when(divisor(w - 1)) { + divisor := subtractor + } + state := s_div + } + if (cfg.divUnroll != 0) when(state === s_neg_output) { + remainder := negated_remainder + state := s_done_div + resHi := false.B + } + if (cfg.mulUnroll != 0) when(state === s_mul) { + val mulReg = Cat(remainder(2 * mulw + 1, w + 1), remainder(w - 1, 0)) + val mplierSign = remainder(w) + val mplier = mulReg(mulw - 1, 0) + val accum = mulReg(2 * mulw, mulw).asSInt + val mpcand = divisor.asSInt + val prod = Cat(mplierSign, mplier(cfg.mulUnroll - 1, 0)).asSInt * mpcand + accum + val nextMulReg = Cat(prod, mplier(mulw - 1, cfg.mulUnroll)) + val nextMplierSign = count === (mulw / cfg.mulUnroll - 2).U && neg_out + + val eOutMask = ((BigInt(-1) << mulw).S >> (count * cfg.mulUnroll.U)(log2Up(mulw) - 1, 0))(mulw - 1, 0) + val eOut = (cfg.mulEarlyOut).B && count =/= (mulw / cfg.mulUnroll - 1).U && count =/= 0.U && + !isHi && (mplier & ~eOutMask) === 0.U + val eOutRes = (mulReg >> (mulw.U - count * cfg.mulUnroll.U)(log2Up(mulw) - 1, 0)) + val nextMulReg1 = Cat(nextMulReg(2 * mulw, mulw), Mux(eOut, eOutRes, nextMulReg)(mulw - 1, 0)) + remainder := Cat(nextMulReg1 >> w, nextMplierSign, nextMulReg1(w - 1, 0)) + + count := count + 1.U + when(eOut || count === (mulw / cfg.mulUnroll - 1).U) { + state := s_done_mul + resHi := isHi + } + } + if (cfg.divUnroll != 0) when(state === s_div) { + val unrolls = ((0 until cfg.divUnroll) + .scanLeft(remainder)) { + case (rem, i) => + // the special case for iteration 0 is to save HW, not for correctness + val difference = if (i == 0) subtractor else rem(2 * w, w) - divisor(w - 1, 0) + val less = difference(w) + Cat(Mux(less, rem(2 * w - 1, w), difference(w - 1, 0)), rem(w - 1, 0), !less) + } + .tail + + remainder := unrolls.last + when(count === (w / cfg.divUnroll).U) { + state := Mux(neg_out, s_neg_output, s_done_div) + resHi := isHi + if (w % cfg.divUnroll < cfg.divUnroll - 1) + remainder := unrolls(w % cfg.divUnroll) + } + count := count + 1.U + + val divby0 = count === 0.U && !subtractor(w) + if (cfg.divEarlyOut) { + val align = 1 << log2Floor(cfg.divUnroll.max(cfg.divEarlyOutGranularity)) + val alignMask = ~((align - 1).U(log2Ceil(w).W)) + val divisorMSB = Log2(divisor(w - 1, 0), w) & alignMask + val dividendMSB = Log2(remainder(w - 1, 0), w) | ~alignMask + val eOutPos = ~(dividendMSB - divisorMSB) + val eOut = count === 0.U && !divby0 && eOutPos >= align.U + when(eOut) { + remainder := remainder(w - 1, 0) << eOutPos + count := eOutPos >> log2Floor(cfg.divUnroll) + } + } + when(divby0 && !isHi) { neg_out := false.B } + } + when(io.resp.fire || io.kill) { + state := s_ready + } + when(io.req.fire) { + state := Mux(cmdMul, s_mul, Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div)) + isHi := cmdHi + resHi := false.B + count := (if (fastMulW) Mux[UInt](cmdMul && halfWidth(io.req.bits), (w / cfg.mulUnroll / 2).U, 0.U) else 0.U) + neg_out := Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign) + divisor := Cat(rhs_sign, rhs_in) + remainder := lhs_in + req := io.req.bits + } + + val outMul = (state & (s_done_mul ^ s_done_div)) === (s_done_mul & ~s_done_div) + val loOut = Mux(fastMulW.B && halfWidth(req) && outMul, result(w - 1, w / 2), result(w / 2 - 1, 0)) + val hiOut = Mux(halfWidth(req), Fill(w / 2, loOut(w / 2 - 1)), result(w - 1, w / 2)) + io.resp.bits.tag := req.tag + + io.resp.bits.data := Cat(hiOut, loOut) + io.resp.valid := (state === s_done_mul || state === s_done_div) + io.req.ready := state === s_ready +} + +class PipelinedMultiplier(width: Int, latency: Int, nXpr: Int = 32, aluFn: ALUFN = new ALUFN) + extends Module + with ShouldBeRetimed { + val io = IO(new Bundle { + val req = Flipped(Valid(new MultiplierReq(width, log2Ceil(nXpr), aluFn))) + val resp = Valid(new MultiplierResp(width, log2Ceil(nXpr))) + }) + + val in = Pipe(io.req) + + val decode = List( + aluFn.FN_MUL -> List(N, X, X), + aluFn.FN_MULH -> List(Y, Y, Y), + aluFn.FN_MULHU -> List(Y, N, N), + aluFn.FN_MULHSU -> List(Y, Y, N) + ) + val cmdHi :: lhsSigned :: rhsSigned :: Nil = + DecodeLogic(in.bits.fn, List(X, X, X), decode).map(_.asBool) + val cmdHalf = (width > 32).B && in.bits.dw === DW_32 + + val lhs = Cat(lhsSigned && in.bits.in1(width - 1), in.bits.in1).asSInt + val rhs = Cat(rhsSigned && in.bits.in2(width - 1), in.bits.in2).asSInt + val prod = lhs * rhs + val muxed = + Mux(cmdHi, prod(2 * width - 1, width), Mux(cmdHalf, prod(width / 2 - 1, 0).sextTo(width), prod(width - 1, 0))) + + val resp = Pipe(in, latency - 1) + io.resp.valid := resp.valid + io.resp.bits.tag := resp.bits.tag + io.resp.bits.data := Pipe(in.valid, muxed, latency - 1).bits +} From e8b7df36f8ecaf020fc9d8358d05b4bac14f910e Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 02:15:05 +0800 Subject: [PATCH 049/140] [rocketv] migrate MulDiv and PipelinedMultiplier --- rocketv/src/Bundle.scala | 16 +- .../src/{Multiplier.scala => MulDiv.scala} | 149 +++++++++--------- rocketv/src/PipelinedMultiplier.scala | 85 ++++++++++ 3 files changed, 175 insertions(+), 75 deletions(-) rename rocketv/src/{Multiplier.scala => MulDiv.scala} (61%) create mode 100644 rocketv/src/PipelinedMultiplier.scala diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index afd0d557b..9452eb46c 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -567,4 +567,18 @@ class Instruction extends Bundle { val rvc = Bool() val inst = new ExpandedInstruction val raw = UInt(32.W) -} \ No newline at end of file +} + +class MultiplierReq(dataBits: Int, tagBits: Int, uopWidth: Int) extends Bundle { + val fn = Bits(uopWidth.W) + val dw = Bool() + val in1 = Bits(dataBits.W) + val in2 = Bits(dataBits.W) + val tag = UInt(tagBits.W) +} + +class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { + val data = Bits(dataBits.W) + val full_data = Bits((2 * dataBits).W) + val tag = UInt(tagBits.W) +} diff --git a/rocketv/src/Multiplier.scala b/rocketv/src/MulDiv.scala similarity index 61% rename from rocketv/src/Multiplier.scala rename to rocketv/src/MulDiv.scala index 74b57821c..21eb121bf 100644 --- a/rocketv/src/Multiplier.scala +++ b/rocketv/src/MulDiv.scala @@ -1,39 +1,70 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{log2Ceil, log2Floor, log2Up, Cat, Decoupled, Enum, Fill, Log2, Pipe, Valid} -import freechips.rocketchip.util._ -// TODO: remove it -import freechips.rocketchip.rocket.{DecodeLogic, MulDivParams} - -class MultiplierReq(dataBits: Int, tagBits: Int, aluFn: ALUFN = new ALUFN) extends Bundle { - val fn = Bits(aluFn.SZ_ALU_FN.W) - val dw = Bits(SZ_DW.W) - val in1 = Bits(dataBits.W) - val in2 = Bits(dataBits.W) - val tag = UInt(tagBits.W) -} +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{BitPat, Cat, Decoupled, Enum, Fill, Log2, log2Ceil, log2Floor} -class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { - val data = Bits(dataBits.W) - val tag = UInt(tagBits.W) +object MulDivParameter { + implicit def rwP: upickle.default.ReadWriter[MulDivParameter] = upickle.default.macroRW[MulDivParameter] } -class MultiplierIO(val dataBits: Int, val tagBits: Int, aluFn: ALUFN = new ALUFN) extends Bundle { - val req = Flipped(Decoupled(new MultiplierReq(dataBits, tagBits, aluFn))) +case class MulDivParameter(useAsyncReset: Boolean, + latency: Int, + xLen: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean) + extends SerializableModuleParameter { + // optional to 16 when rve? + val nXpr: Int = 32 + val uopWidth: Int = 4 + + def FN_MUL = 1.U(4.W) + def FN_MULH = 2.U(4.W) + def FN_MULHU = 3.U(4.W) + def FN_MULHSU = 4.U(4.W) + def FN_DIV = 4.U(4.W) + def FN_REM = 5.U(4.W) + def FN_DIVU = 6.U(4.W) + def FN_REMU = 7.U(4.W) + def DW_32 = false.B +} +class MulDivInterface(parameter: MulDivParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val req = Flipped(Decoupled(new MultiplierReq(parameter.xLen, log2Ceil(parameter.nXpr), parameter.uopWidth))) val kill = Input(Bool()) - val resp = Decoupled(new MultiplierResp(dataBits, tagBits)) + val resp = Decoupled(new MultiplierResp(parameter.xLen, log2Ceil(parameter.nXpr))) } -class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32, aluFn: ALUFN = new ALUFN) extends Module { - private def minDivLatency = (cfg.divUnroll > 0).option(if (cfg.divEarlyOut) 3 else 1 + w / cfg.divUnroll) - private def minMulLatency = (cfg.mulUnroll > 0).option(if (cfg.mulEarlyOut) 2 else w / cfg.mulUnroll) - def minLatency: Int = (minDivLatency ++ minMulLatency).min +@instantiable +class MulDiv(val parameter: MulDivParameter) + extends FixedIORawModule(new MulDivInterface(parameter)) + with SerializableModule[MulDivParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + object cfg { + val divUnroll = parameter.divUnroll + val divEarlyOut = parameter.divEarlyOut + val divEarlyOutGranularity = parameter.divEarlyOutGranularity + val mulUnroll = parameter.mulUnroll + val mulEarlyOut = parameter.mulEarlyOut + } + + def N = BitPat.N() + def Y = BitPat.N() + def X = BitPat.dontCare(1) - val io = IO(new MultiplierIO(width, log2Up(nXpr), aluFn)) val w = io.req.bits.in1.getWidth val mulw = if (cfg.mulUnroll == 0) w else (w + cfg.mulUnroll - 1) / cfg.mulUnroll * cfg.mulUnroll val fastMulW = if (cfg.mulUnroll == 0) false else w / 2 > cfg.mulUnroll && w % (2 * cfg.mulUnroll) == 0 @@ -45,29 +76,30 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32, aluFn: ALUFN = new A val count = Reg( UInt( log2Ceil( - ((cfg.divUnroll != 0).option(w / cfg.divUnroll + 1).toSeq ++ - (cfg.mulUnroll != 0).option(mulw / cfg.mulUnroll)).reduce(_ max _) + (Option.when(cfg.divUnroll != 0)(w / cfg.divUnroll + 1).toSeq ++ + Option.when(cfg.mulUnroll != 0)(mulw / cfg.mulUnroll)).reduce(_ max _) ).W ) ) val neg_out = Reg(Bool()) val isHi = Reg(Bool()) val resHi = Reg(Bool()) - val divisor = Reg(Bits((w + 1).W)) // div only needs w bits - val remainder = Reg(Bits((2 * mulw + 2).W)) // div only needs 2*w+1 bits + val divisor = Reg(UInt((w + 1).W)) // div only needs w bits + val remainder = Reg(UInt((2 * mulw + 2).W)) // div only needs 2*w+1 bits val mulDecode = List( - aluFn.FN_MUL -> List(Y, N, X, X), - aluFn.FN_MULH -> List(Y, Y, Y, Y), - aluFn.FN_MULHU -> List(Y, Y, N, N), - aluFn.FN_MULHSU -> List(Y, Y, Y, N) + parameter.FN_MUL -> List(Y, N, X, X), + parameter.FN_MULH -> List(Y, Y, Y, Y), + parameter.FN_MULHU -> List(Y, Y, N, N), + parameter.FN_MULHSU -> List(Y, Y, Y, N) ) val divDecode = List( - aluFn.FN_DIV -> List(N, N, Y, Y), - aluFn.FN_REM -> List(N, Y, Y, Y), - aluFn.FN_DIVU -> List(N, N, N, N), - aluFn.FN_REMU -> List(N, Y, N, N) + parameter.FN_DIV -> List(N, N, Y, Y), + parameter.FN_REM -> List(N, Y, Y, Y), + parameter.FN_DIVU -> List(N, N, N, N), + parameter.FN_REMU -> List(N, Y, N, N) ) + // TODO: move these decoding to Decoder. val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil = DecodeLogic( io.req.bits.fn, @@ -76,7 +108,7 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32, aluFn: ALUFN = new A ).map(_.asBool) require(w == 32 || w == 64) - def halfWidth(req: MultiplierReq) = (w > 32).B && req.dw === DW_32 + def halfWidth(req: MultiplierReq) = (w > 32).B && req.dw === parameter.DW_32 def sext(x: Bits, halfW: Bool, signed: Bool) = { val sign = signed && Mux(halfW, x(w / 2 - 1), x(w - 1)) @@ -114,10 +146,10 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32, aluFn: ALUFN = new A val nextMulReg = Cat(prod, mplier(mulw - 1, cfg.mulUnroll)) val nextMplierSign = count === (mulw / cfg.mulUnroll - 2).U && neg_out - val eOutMask = ((BigInt(-1) << mulw).S >> (count * cfg.mulUnroll.U)(log2Up(mulw) - 1, 0))(mulw - 1, 0) + val eOutMask = ((BigInt(-1) << mulw).S >> (count * cfg.mulUnroll.U)(log2Ceil(mulw) - 1, 0))(mulw - 1, 0) val eOut = (cfg.mulEarlyOut).B && count =/= (mulw / cfg.mulUnroll - 1).U && count =/= 0.U && !isHi && (mplier & ~eOutMask) === 0.U - val eOutRes = (mulReg >> (mulw.U - count * cfg.mulUnroll.U)(log2Up(mulw) - 1, 0)) + val eOutRes = (mulReg >> (mulw.U - count * cfg.mulUnroll.U)(log2Ceil(mulw) - 1, 0)) val nextMulReg1 = Cat(nextMulReg(2 * mulw, mulw), Mux(eOut, eOutRes, nextMulReg)(mulw - 1, 0)) remainder := Cat(nextMulReg1 >> w, nextMplierSign, nextMulReg1(w - 1, 0)) @@ -182,38 +214,7 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32, aluFn: ALUFN = new A io.resp.bits.tag := req.tag io.resp.bits.data := Cat(hiOut, loOut) + io.resp.bits.full_data := Cat(remainder(2*w, w+1), remainder(w-1, 0)) io.resp.valid := (state === s_done_mul || state === s_done_div) io.req.ready := state === s_ready } - -class PipelinedMultiplier(width: Int, latency: Int, nXpr: Int = 32, aluFn: ALUFN = new ALUFN) - extends Module - with ShouldBeRetimed { - val io = IO(new Bundle { - val req = Flipped(Valid(new MultiplierReq(width, log2Ceil(nXpr), aluFn))) - val resp = Valid(new MultiplierResp(width, log2Ceil(nXpr))) - }) - - val in = Pipe(io.req) - - val decode = List( - aluFn.FN_MUL -> List(N, X, X), - aluFn.FN_MULH -> List(Y, Y, Y), - aluFn.FN_MULHU -> List(Y, N, N), - aluFn.FN_MULHSU -> List(Y, Y, N) - ) - val cmdHi :: lhsSigned :: rhsSigned :: Nil = - DecodeLogic(in.bits.fn, List(X, X, X), decode).map(_.asBool) - val cmdHalf = (width > 32).B && in.bits.dw === DW_32 - - val lhs = Cat(lhsSigned && in.bits.in1(width - 1), in.bits.in1).asSInt - val rhs = Cat(rhsSigned && in.bits.in2(width - 1), in.bits.in2).asSInt - val prod = lhs * rhs - val muxed = - Mux(cmdHi, prod(2 * width - 1, width), Mux(cmdHalf, prod(width / 2 - 1, 0).sextTo(width), prod(width - 1, 0))) - - val resp = Pipe(in, latency - 1) - io.resp.valid := resp.valid - io.resp.bits.tag := resp.bits.tag - io.resp.bits.data := Pipe(in.valid, muxed, latency - 1).bits -} diff --git a/rocketv/src/PipelinedMultiplier.scala b/rocketv/src/PipelinedMultiplier.scala new file mode 100644 index 000000000..c4e03237b --- /dev/null +++ b/rocketv/src/PipelinedMultiplier.scala @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object PipelinedMultiplierParameter { + implicit def rwP: upickle.default.ReadWriter[PipelinedMultiplierParameter] = + upickle.default.macroRW[PipelinedMultiplierParameter] +} + +case class PipelinedMultiplierParameter( + useAsyncReset: Boolean, + latency: Int, + xLen: Int) + extends SerializableModuleParameter { + + val nXpr: Int = 32 + val uopWidth: Int = 4 + + def FN_MUL = 0.U(4.W) + def FN_MULH = 1.U(4.W) + def FN_MULHSU = 2.U(4.W) + def FN_MULHU = 3.U(4.W) + + def DW_32 = false.B + def DW_64 = true.B +} +class PipelinedMultiplierInterface(parameter: PipelinedMultiplierParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val req = Flipped(Valid(new MultiplierReq(parameter.xLen, log2Ceil(parameter.nXpr), parameter.uopWidth))) + val resp = Valid(new MultiplierResp(parameter.xLen, log2Ceil(parameter.nXpr))) +} + +@instantiable +class PipelinedMultiplier(val parameter: PipelinedMultiplierParameter) + extends FixedIORawModule(new PipelinedMultiplierInterface(parameter)) + with SerializableModule[PipelinedMultiplierParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val width = parameter.xLen + val latency = parameter.latency + def N = BitPat.N() + def Y = BitPat.N() + def X = BitPat.dontCare(1) + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + val in = Pipe(io.req) + + val decode = List( + parameter.FN_MUL -> List(N, X, X), + parameter.FN_MULH -> List(Y, Y, Y), + parameter.FN_MULHU -> List(Y, N, N), + parameter.FN_MULHSU -> List(Y, Y, N) + ) + // TODO: move these decoding to Decoder. + val cmdHi :: lhsSigned :: rhsSigned :: Nil = + DecodeLogic(in.bits.fn, List(X, X, X), decode).map(_.asBool) + val cmdHalf = (width > 32).B && in.bits.dw === parameter.DW_32 + + val lhs = Cat(lhsSigned && in.bits.in1(width - 1), in.bits.in1).asSInt + val rhs = Cat(rhsSigned && in.bits.in2(width - 1), in.bits.in2).asSInt + val prod = lhs * rhs + val muxed = + Mux(cmdHi, prod(2 * width - 1, width), Mux(cmdHalf, sextTo(prod(width / 2 - 1, 0), width), prod(width - 1, 0))) + + val resp = Pipe(in, latency - 1) + io.resp.valid := resp.valid + io.resp.bits.tag := resp.bits.tag + io.resp.bits.data := Pipe(in.valid, muxed, latency - 1).bits + io.resp.bits.full_data := Pipe(in.valid, prod, latency - 1).bits.asUInt +} From 8da1b9eeaa45742826fe64032ecdab7283578c83 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 02:15:31 +0800 Subject: [PATCH 050/140] [rocketv] add elaborator for MulDiv and PipelinedMultiplier - MulDiv - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.MulDiv config --useAsyncReset true --latency 2 --width 32 --divUnroll 1 --divEarlyOut false --divEarlyOutGranularity 1 --mulUnroll 1 --mulEarlyOut false - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.MulDiv design --parameter ./MulDiv.json --run-firtool - PipelinedMultiplier - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PipelinedMultiplier config --useAsyncReset true --latency 2 --width 32 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PipelinedMultiplier design --parameter ./PipelinedMultiplier.json --run-firtool --- elaborator/src/rocketv/MulDiv.scala | 42 +++++++++++++++++++ .../src/rocketv/PipelinedMultiplier.scala | 32 ++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 elaborator/src/rocketv/MulDiv.scala create mode 100644 elaborator/src/rocketv/PipelinedMultiplier.scala diff --git a/elaborator/src/rocketv/MulDiv.scala b/elaborator/src/rocketv/MulDiv.scala new file mode 100644 index 000000000..8b2814490 --- /dev/null +++ b/elaborator/src/rocketv/MulDiv.scala @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{MulDiv, MulDivParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object MulDiv extends Elaborator { + @main + case class MulDivParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "width") width: Int, + @arg(name = "divUnroll") divUnroll: Int, + @arg(name = "divEarlyOut") divEarlyOut: Boolean, + @arg(name = "divEarlyOutGranularity") divEarlyOutGranularity: Int, + @arg(name = "mulUnroll") mulUnroll: Int, + @arg(name = "mulEarlyOut") mulEarlyOut: Boolean) { + def convert: MulDivParameter = MulDivParameter( + useAsyncReset, + latency, + width, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut + ) + } + + implicit def MulDivParameterMainParser: ParserForClass[MulDivParameterMain] = ParserForClass[MulDivParameterMain] + + @main + def config(@arg(name = "parameter") parameter: MulDivParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[MulDiv, MulDivParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/PipelinedMultiplier.scala b/elaborator/src/rocketv/PipelinedMultiplier.scala new file mode 100644 index 000000000..9ef2cb47b --- /dev/null +++ b/elaborator/src/rocketv/PipelinedMultiplier.scala @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{PipelinedMultiplier, PipelinedMultiplierParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PipelinedMultiplier extends Elaborator { + @main + case class PipelinedMultiplierParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "width") width: Int) { + def convert: PipelinedMultiplierParameter = PipelinedMultiplierParameter( + useAsyncReset: Boolean, + latency: Int, + width: Int + ) + } + + implicit def PipelinedMultiplierParameterMainParser: ParserForClass[PipelinedMultiplierParameterMain] = ParserForClass[PipelinedMultiplierParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PipelinedMultiplierParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PipelinedMultiplier, PipelinedMultiplierParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 521d45966a665f83125900e71648e8c7644c9d4c Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 02:42:20 +0800 Subject: [PATCH 051/140] [rocketv] copy PMA into rocketv project --- rocketv/src/PMA.scala | 50 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 rocketv/src/PMA.scala diff --git a/rocketv/src/PMA.scala b/rocketv/src/PMA.scala new file mode 100644 index 000000000..5ebbaa6fd --- /dev/null +++ b/rocketv/src/PMA.scala @@ -0,0 +1,50 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package freechips.rocketchip.rocket + +import chisel3._ +import chisel3.util._ + +import org.chipsalliance.cde.config.{Field, Parameters} +import freechips.rocketchip.subsystem.CacheBlockBytes +import freechips.rocketchip.diplomacy.RegionType +import freechips.rocketchip.tile.{CoreModule, CoreBundle} +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property +import freechips.rocketchip.devices.debug.DebugModuleKey +import chisel3.experimental.SourceInfo + +class PMAChecker(manager: TLSlavePortParameters)(implicit p: Parameters) extends CoreModule()(p) { + val io = IO(new Bundle { + val paddr = Input(UInt()) + + val resp = Output(new Bundle { + val cacheable = Bool() + val r = Bool() + val w = Bool() + val pp = Bool() + val al = Bool() + val aa = Bool() + val x = Bool() + val eff = Bool() + }) + }) + + // PMA + // check exist a slave can consume this address. + val legal_address = manager.findSafe(io.paddr).reduce(_||_) + // check utility to help check SoC property. + def fastCheck(member: TLManagerParameters => Boolean) = + legal_address && manager.fastProperty(io.paddr, member, (b:Boolean) => b.B) + + io.resp.cacheable := fastCheck(_.supportsAcquireB) + io.resp.r := fastCheck(_.supportsGet) + io.resp.w := fastCheck(_.supportsPutFull) + io.resp.pp := fastCheck(_.supportsPutPartial) + io.resp.al := fastCheck(_.supportsLogical) + io.resp.aa := fastCheck(_.supportsArithmetic) + io.resp.x := fastCheck(_.executable) + io.resp.eff := fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType) +} From 9d89fbbed373886b61828a98f2eac873c53d03ef Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 06:17:55 +0800 Subject: [PATCH 052/140] [rocketv] migrate PMAChecker --- rocketv/src/Bundle.scala | 11 +++++ rocketv/src/PMA.scala | 88 +++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 42 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 9452eb46c..46b4ea418 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -582,3 +582,14 @@ class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { val full_data = Bits((2 * dataBits).W) val tag = UInt(tagBits.W) } + +class PMACheckerResponse extends Bundle { + val cacheable = Bool() + val r = Bool() + val w = Bool() + val pp = Bool() + val al = Bool() + val aa = Bool() + val x = Bool() + val eff = Bool() +} \ No newline at end of file diff --git a/rocketv/src/PMA.scala b/rocketv/src/PMA.scala index 5ebbaa6fd..7c3ae51f9 100644 --- a/rocketv/src/PMA.scala +++ b/rocketv/src/PMA.scala @@ -1,50 +1,54 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package freechips.rocketchip.rocket +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.experimental.BitSet -import org.chipsalliance.cde.config.{Field, Parameters} -import freechips.rocketchip.subsystem.CacheBlockBytes -import freechips.rocketchip.diplomacy.RegionType -import freechips.rocketchip.tile.{CoreModule, CoreBundle} -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property -import freechips.rocketchip.devices.debug.DebugModuleKey -import chisel3.experimental.SourceInfo +object PMACheckerParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if(str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + implicit def rwP: upickle.default.ReadWriter[PMACheckerParameter] = upickle.default.macroRW[PMACheckerParameter] +} -class PMAChecker(manager: TLSlavePortParameters)(implicit p: Parameters) extends CoreModule()(p) { - val io = IO(new Bundle { - val paddr = Input(UInt()) +case class PMACheckerParameter( + paddrBits: Int, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet) + extends SerializableModuleParameter - val resp = Output(new Bundle { - val cacheable = Bool() - val r = Bool() - val w = Bool() - val pp = Bool() - val al = Bool() - val aa = Bool() - val x = Bool() - val eff = Bool() - }) - }) +class PMACheckerInterface(parameter: PMACheckerParameter) extends Bundle { + val paddr = Input(UInt(parameter.paddrBits.W)) + val resp = Output(new PMACheckerResponse) +} - // PMA +@instantiable +class PMAChecker(val parameter: PMACheckerParameter) + extends FixedIORawModule(new PMACheckerInterface(parameter)) + with SerializableModule[PMACheckerParameter] { // check exist a slave can consume this address. - val legal_address = manager.findSafe(io.paddr).reduce(_||_) - // check utility to help check SoC property. - def fastCheck(member: TLManagerParameters => Boolean) = - legal_address && manager.fastProperty(io.paddr, member, (b:Boolean) => b.B) - - io.resp.cacheable := fastCheck(_.supportsAcquireB) - io.resp.r := fastCheck(_.supportsGet) - io.resp.w := fastCheck(_.supportsPutFull) - io.resp.pp := fastCheck(_.supportsPutPartial) - io.resp.al := fastCheck(_.supportsLogical) - io.resp.aa := fastCheck(_.supportsArithmetic) - io.resp.x := fastCheck(_.executable) - io.resp.eff := fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType) + val legal_address = parameter.legal.matches(io.paddr) + io.resp.cacheable := legal_address && (if(parameter.cacheable.isEmpty) false.B else parameter.cacheable.matches(io.paddr)) + io.resp.r := legal_address && (if(parameter.read.isEmpty) false.B else parameter.read.matches(io.paddr)) + io.resp.w := legal_address && (if(parameter.write.isEmpty) false.B else parameter.write.matches(io.paddr)) + io.resp.pp := legal_address && (if(parameter.putPartial.isEmpty) false.B else parameter.putPartial.matches(io.paddr)) + io.resp.al := legal_address && (if(parameter.logic.isEmpty) false.B else parameter.logic.matches(io.paddr)) + io.resp.aa := legal_address && (if(parameter.arithmetic.isEmpty) false.B else parameter.arithmetic.matches(io.paddr)) + io.resp.x := legal_address && (if(parameter.exec.isEmpty) false.B else parameter.exec.matches(io.paddr)) + io.resp.eff := legal_address && (if(parameter.sideEffects.isEmpty) false.B else parameter.sideEffects.matches(io.paddr)) } From 463c58dc6d0024777361ed9e01d4dfbeef1a3f68 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 06:18:05 +0800 Subject: [PATCH 053/140] [rocketv] add elaborator for PMAChecker - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PMAChecker config --paddrBits 32 --legal 00000000-ffffffff --cacheable 80000000-ffffffff --read 00000000-ffffffff --write 00000000-ffffffff --putPartial 00000000-ffffffff --exec 80000000-ffffffff --sideEffects 00000000-3fffffff - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PMAChecker design --parameter ./PMAChecker.json --run-firtool --- elaborator/src/rocketv/PMAChecker.scala | 72 +++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 elaborator/src/rocketv/PMAChecker.scala diff --git a/elaborator/src/rocketv/PMAChecker.scala b/elaborator/src/rocketv/PMAChecker.scala new file mode 100644 index 000000000..571acd126 --- /dev/null +++ b/elaborator/src/rocketv/PMAChecker.scala @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{PMAChecker, PMACheckerParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PMAChecker extends Elaborator { + + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right(strs.head.split(",").map{ opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + }.reduce(_.union(_))) + } + } + @main + case class PMACheckerParameterMain( + paddrBits: Int, + legal: Seq[BitSet], + cacheable: Seq[BitSet], + read: Seq[BitSet], + write: Seq[BitSet], + putPartial: Seq[BitSet], + logic: Seq[BitSet], + arithmetic: Seq[BitSet], + exec: Seq[BitSet], + sideEffects: Seq[BitSet]) { + def convert: PMACheckerParameter = PMACheckerParameter( + paddrBits, + legal.foldLeft(BitSet.empty)(_.union(_)), + cacheable.foldLeft(BitSet.empty)(_.union(_)), + read.foldLeft(BitSet.empty)(_.union(_)), + write.foldLeft(BitSet.empty)(_.union(_)), + putPartial.foldLeft(BitSet.empty)(_.union(_)), + logic.foldLeft(BitSet.empty)(_.union(_)), + arithmetic.foldLeft(BitSet.empty)(_.union(_)), + exec.foldLeft(BitSet.empty)(_.union(_)), + sideEffects.foldLeft(BitSet.empty)(_.union(_)) + ) + } + + implicit def PMACheckerParameterMainParser: ParserForClass[PMACheckerParameterMain] = + ParserForClass[PMACheckerParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PMACheckerParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PMAChecker, PMACheckerParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From c605e0ccb441741567e9badb1fd0ed6634a32a08 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 06:32:28 +0800 Subject: [PATCH 054/140] [rocketv] copy PMP into rocketv project --- rocketv/src/PMP.scala | 219 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 rocketv/src/PMP.scala diff --git a/rocketv/src/PMP.scala b/rocketv/src/PMP.scala new file mode 100644 index 000000000..a38449015 --- /dev/null +++ b/rocketv/src/PMP.scala @@ -0,0 +1,219 @@ +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{log2Ceil, Cat} +import org.chipsalliance.cde.config._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.util._ + +class PMPConfig extends Bundle { + val l = Bool() + val res = UInt(2.W) + val a = UInt(2.W) + val x = Bool() + val w = Bool() + val r = Bool() +} + +object PMP { + def lgAlign = 2 + + def apply(reg: PMPReg): PMP = { + val pmp = Wire(new PMP()(reg.p)) + pmp.cfg := reg.cfg + pmp.addr := reg.addr + pmp.mask := pmp.computeMask + pmp + } +} + +class PMPReg(implicit p: Parameters) extends CoreBundle()(p) { + val cfg = new PMPConfig + val addr = UInt((paddrBits - PMP.lgAlign).W) + + def reset(): Unit = { + cfg.a := 0.U + cfg.l := 0.U + } + + def readAddr = if (pmpGranularity.log2 == PMP.lgAlign) addr + else { + val mask = ((BigInt(1) << (pmpGranularity.log2 - PMP.lgAlign)) - 1).U + Mux(napot, addr | (mask >> 1), ~(~addr | mask)) + } + def napot = cfg.a(1) + def torNotNAPOT = cfg.a(0) + def tor = !napot && torNotNAPOT + def cfgLocked = cfg.l + def addrLocked(next: PMPReg) = cfgLocked || next.cfgLocked && next.tor +} + +class PMP(implicit p: Parameters) extends PMPReg { + val mask = UInt(paddrBits.W) + + import PMP._ + def computeMask = { + val base = Cat(addr, cfg.a(0)) | ((pmpGranularity - 1).U >> lgAlign) + Cat(base & ~(base + 1.U), ((1 << lgAlign) - 1).U) + } + private def comparand = ~(~(addr << lgAlign) | (pmpGranularity - 1).U) + + private def pow2Match(x: UInt, lgSize: UInt, lgMaxSize: Int) = { + def eval(a: UInt, b: UInt, m: UInt) = ((a ^ b) & ~m) === 0.U + if (lgMaxSize <= pmpGranularity.log2) { + eval(x, comparand, mask) + } else { + // break up the circuit; the MSB part will be CSE'd + val lsbMask = mask | UIntToOH1(lgSize, lgMaxSize) + val msbMatch = eval(x >> lgMaxSize, comparand >> lgMaxSize, mask >> lgMaxSize) + val lsbMatch = eval(x(lgMaxSize - 1, 0), comparand(lgMaxSize - 1, 0), lsbMask(lgMaxSize - 1, 0)) + msbMatch && lsbMatch + } + } + + private def boundMatch(x: UInt, lsbMask: UInt, lgMaxSize: Int) = { + if (lgMaxSize <= pmpGranularity.log2) { + x < comparand + } else { + // break up the circuit; the MSB part will be CSE'd + val msbsLess = (x >> lgMaxSize) < (comparand >> lgMaxSize) + val msbsEqual = ((x >> lgMaxSize) ^ (comparand >> lgMaxSize)) === 0.U + val lsbsLess = (x(lgMaxSize - 1, 0) | lsbMask) < comparand(lgMaxSize - 1, 0) + msbsLess || (msbsEqual && lsbsLess) + } + } + + private def lowerBoundMatch(x: UInt, lgSize: UInt, lgMaxSize: Int) = + !boundMatch(x, UIntToOH1(lgSize, lgMaxSize), lgMaxSize) + + private def upperBoundMatch(x: UInt, lgMaxSize: Int) = + boundMatch(x, 0.U, lgMaxSize) + + private def rangeMatch(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP) = + prev.lowerBoundMatch(x, lgSize, lgMaxSize) && upperBoundMatch(x, lgMaxSize) + + private def pow2Homogeneous(x: UInt, pgLevel: UInt) = { + val maskHomogeneous = pgLevelMap { idxBits => if (idxBits > paddrBits) false.B else mask(idxBits - 1) }(pgLevel) + maskHomogeneous || (pgLevelMap { idxBits => ((x ^ comparand) >> idxBits) =/= 0.U }(pgLevel)) + } + + private def pgLevelMap[T](f: Int => T) = (0 until pgLevels).map { i => + f(pgIdxBits + (pgLevels - 1 - i) * pgLevelBits) + } + + private def rangeHomogeneous(x: UInt, pgLevel: UInt, prev: PMP) = { + val beginsAfterLower = !(x < prev.comparand) + val beginsAfterUpper = !(x < comparand) + + val pgMask = pgLevelMap { idxBits => (((BigInt(1) << paddrBits) - (BigInt(1) << idxBits)).max(0)).U }(pgLevel) + val endsBeforeLower = (x & pgMask) < (prev.comparand & pgMask) + val endsBeforeUpper = (x & pgMask) < (comparand & pgMask) + + endsBeforeLower || beginsAfterUpper || (beginsAfterLower && endsBeforeUpper) + } + + // returns whether this PMP completely contains, or contains none of, a page + def homogeneous(x: UInt, pgLevel: UInt, prev: PMP): Bool = + Mux(napot, pow2Homogeneous(x, pgLevel), !torNotNAPOT || rangeHomogeneous(x, pgLevel, prev)) + + // returns whether this matching PMP fully contains the access + def aligned(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP): Bool = if (lgMaxSize <= pmpGranularity.log2) true.B + else { + val lsbMask = UIntToOH1(lgSize, lgMaxSize) + val straddlesLowerBound = + ((x >> lgMaxSize) ^ (prev.comparand >> lgMaxSize)) === 0.U && (prev.comparand(lgMaxSize - 1, 0) & ~x( + lgMaxSize - 1, + 0 + )) =/= 0.U + val straddlesUpperBound = ((x >> lgMaxSize) ^ (comparand >> lgMaxSize)) === 0.U && (comparand( + lgMaxSize - 1, + 0 + ) & (x(lgMaxSize - 1, 0) | lsbMask)) =/= 0.U + val rangeAligned = !(straddlesLowerBound || straddlesUpperBound) + val pow2Aligned = (lsbMask & ~mask(lgMaxSize - 1, 0)) === 0.U + Mux(napot, pow2Aligned, rangeAligned) + } + + // returns whether this PMP matches at least one byte of the access + def hit(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP): Bool = + Mux(napot, pow2Match(x, lgSize, lgMaxSize), torNotNAPOT && rangeMatch(x, lgSize, lgMaxSize, prev)) +} + +class PMPHomogeneityChecker(pmps: Seq[PMP])(implicit p: Parameters) { + def apply(addr: UInt, pgLevel: UInt): Bool = { + pmps + .foldLeft((true.B, 0.U.asTypeOf(new PMP))) { + case ((h, prev), pmp) => + (h && pmp.homogeneous(addr, pgLevel, prev), pmp) + } + ._1 + } +} + +class PMPChecker(lgMaxSize: Int)(implicit val p: Parameters) extends Module with HasCoreParameters { + val io = IO(new Bundle { + val prv = Input(UInt(PRV.SZ.W)) + val pmp = Input(Vec(nPMPs, new PMP)) + val addr = Input(UInt(paddrBits.W)) + val size = Input(UInt(log2Ceil(lgMaxSize + 1).W)) + val r = Output(Bool()) + val w = Output(Bool()) + val x = Output(Bool()) + }) + + val default = if (io.pmp.isEmpty) true.B else io.prv > PRV.S.U + val pmp0 = WireInit(0.U.asTypeOf(new PMP)) + pmp0.cfg.r := default + pmp0.cfg.w := default + pmp0.cfg.x := default + + val res = (io.pmp.zip(pmp0 +: io.pmp)).reverse.foldLeft(pmp0) { + case (prev, (pmp, prevPMP)) => + val hit = pmp.hit(io.addr, io.size, lgMaxSize, prevPMP) + val ignore = default && !pmp.cfg.l + val aligned = pmp.aligned(io.addr, io.size, lgMaxSize, prevPMP) + + for ( + (name, idx) <- Seq("no", "TOR", if (pmpGranularity <= 4) "NA4" else "", "NAPOT").zipWithIndex; if name.nonEmpty + ) + property + .cover(pmp.cfg.a === idx.U, s"The cfg access is set to ${name} access ", "Cover PMP access mode setting") + + property.cover(pmp.cfg.l === 0x1.U, s"The cfg lock is set to high ", "Cover PMP lock mode setting") + + // Not including Write and no Read permission as the combination is reserved + for ((name, idx) <- Seq("no", "RO", "", "RW", "X", "RX", "", "RWX").zipWithIndex; if name.nonEmpty) + property.cover( + (Cat(pmp.cfg.x, pmp.cfg.w, pmp.cfg.r) === idx.U), + s"The permission is set to ${name} access ", + "Cover PMP access permission setting" + ) + + for ( + (name, idx) <- Seq("", "TOR", if (pmpGranularity <= 4) "NA4" else "", "NAPOT").zipWithIndex; if name.nonEmpty + ) { + property.cover( + !ignore && hit && aligned && pmp.cfg.a === idx.U, + s"The access matches ${name} mode ", + "Cover PMP access" + ) + property.cover( + pmp.cfg.l && hit && aligned && pmp.cfg.a === idx.U, + s"The access matches ${name} mode with lock bit high", + "Cover PMP access with lock bit" + ) + } + + val cur = WireInit(pmp) + cur.cfg.r := aligned && (pmp.cfg.r || ignore) + cur.cfg.w := aligned && (pmp.cfg.w || ignore) + cur.cfg.x := aligned && (pmp.cfg.x || ignore) + Mux(hit, cur, prev) + } + + io.r := res.cfg.r + io.w := res.cfg.w + io.x := res.cfg.x +} From 82552d1751cb89ff02dd5b210dbed7b42a8062da Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 06:44:08 +0800 Subject: [PATCH 055/140] [rocketv] migrate PMPChecker --- rocketv/src/PMP.scala | 229 +++++++----------------------------------- 1 file changed, 37 insertions(+), 192 deletions(-) diff --git a/rocketv/src/PMP.scala b/rocketv/src/PMP.scala index a38449015..be454eae4 100644 --- a/rocketv/src/PMP.scala +++ b/rocketv/src/PMP.scala @@ -1,211 +1,56 @@ -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{log2Ceil, Cat} -import org.chipsalliance.cde.config._ -import freechips.rocketchip.tile._ -import freechips.rocketchip.util._ - -class PMPConfig extends Bundle { - val l = Bool() - val res = UInt(2.W) - val a = UInt(2.W) - val x = Bool() - val w = Bool() - val r = Bool() -} +import chisel3.experimental.hierarchy.{instantiable, public} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.log2Ceil -object PMP { - def lgAlign = 2 - - def apply(reg: PMPReg): PMP = { - val pmp = Wire(new PMP()(reg.p)) - pmp.cfg := reg.cfg - pmp.addr := reg.addr - pmp.mask := pmp.computeMask - pmp - } +object PMPCheckerParameter { + implicit def rwP: upickle.default.ReadWriter[PMPCheckerParameter] = upickle.default.macroRW[PMPCheckerParameter] } -class PMPReg(implicit p: Parameters) extends CoreBundle()(p) { - val cfg = new PMPConfig - val addr = UInt((paddrBits - PMP.lgAlign).W) - - def reset(): Unit = { - cfg.a := 0.U - cfg.l := 0.U - } - - def readAddr = if (pmpGranularity.log2 == PMP.lgAlign) addr - else { - val mask = ((BigInt(1) << (pmpGranularity.log2 - PMP.lgAlign)) - 1).U - Mux(napot, addr | (mask >> 1), ~(~addr | mask)) - } - def napot = cfg.a(1) - def torNotNAPOT = cfg.a(0) - def tor = !napot && torNotNAPOT - def cfgLocked = cfg.l - def addrLocked(next: PMPReg) = cfgLocked || next.cfgLocked && next.tor +case class PMPCheckerParameter( + nPMPs: Int, + paddrBits: Int, + // @todo: log2Ceil(coreDataBytes)? + lgMaxSize: Int, + pmpGranularity: Int) + extends SerializableModuleParameter + +class PMPCheckerInterface(parameter: PMPCheckerParameter) extends Bundle { + val prv = Input(UInt(PRV.SZ.W)) + val pmp = Input(Vec(parameter.nPMPs, new PMP(parameter.paddrBits))) + val addr = Input(UInt(parameter.paddrBits.W)) + val size = Input(UInt(log2Ceil(parameter.lgMaxSize + 1).W)) + val r = Output(Bool()) + val w = Output(Bool()) + val x = Output(Bool()) } -class PMP(implicit p: Parameters) extends PMPReg { - val mask = UInt(paddrBits.W) - - import PMP._ - def computeMask = { - val base = Cat(addr, cfg.a(0)) | ((pmpGranularity - 1).U >> lgAlign) - Cat(base & ~(base + 1.U), ((1 << lgAlign) - 1).U) - } - private def comparand = ~(~(addr << lgAlign) | (pmpGranularity - 1).U) - - private def pow2Match(x: UInt, lgSize: UInt, lgMaxSize: Int) = { - def eval(a: UInt, b: UInt, m: UInt) = ((a ^ b) & ~m) === 0.U - if (lgMaxSize <= pmpGranularity.log2) { - eval(x, comparand, mask) - } else { - // break up the circuit; the MSB part will be CSE'd - val lsbMask = mask | UIntToOH1(lgSize, lgMaxSize) - val msbMatch = eval(x >> lgMaxSize, comparand >> lgMaxSize, mask >> lgMaxSize) - val lsbMatch = eval(x(lgMaxSize - 1, 0), comparand(lgMaxSize - 1, 0), lsbMask(lgMaxSize - 1, 0)) - msbMatch && lsbMatch - } - } - - private def boundMatch(x: UInt, lsbMask: UInt, lgMaxSize: Int) = { - if (lgMaxSize <= pmpGranularity.log2) { - x < comparand - } else { - // break up the circuit; the MSB part will be CSE'd - val msbsLess = (x >> lgMaxSize) < (comparand >> lgMaxSize) - val msbsEqual = ((x >> lgMaxSize) ^ (comparand >> lgMaxSize)) === 0.U - val lsbsLess = (x(lgMaxSize - 1, 0) | lsbMask) < comparand(lgMaxSize - 1, 0) - msbsLess || (msbsEqual && lsbsLess) - } - } +@instantiable +class PMPChecker(val parameter: PMPCheckerParameter) + extends FixedIORawModule(new PMPCheckerInterface(parameter)) + with SerializableModule[PMPCheckerParameter] { - private def lowerBoundMatch(x: UInt, lgSize: UInt, lgMaxSize: Int) = - !boundMatch(x, UIntToOH1(lgSize, lgMaxSize), lgMaxSize) - - private def upperBoundMatch(x: UInt, lgMaxSize: Int) = - boundMatch(x, 0.U, lgMaxSize) - - private def rangeMatch(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP) = - prev.lowerBoundMatch(x, lgSize, lgMaxSize) && upperBoundMatch(x, lgMaxSize) - - private def pow2Homogeneous(x: UInt, pgLevel: UInt) = { - val maskHomogeneous = pgLevelMap { idxBits => if (idxBits > paddrBits) false.B else mask(idxBits - 1) }(pgLevel) - maskHomogeneous || (pgLevelMap { idxBits => ((x ^ comparand) >> idxBits) =/= 0.U }(pgLevel)) - } - - private def pgLevelMap[T](f: Int => T) = (0 until pgLevels).map { i => - f(pgIdxBits + (pgLevels - 1 - i) * pgLevelBits) - } - - private def rangeHomogeneous(x: UInt, pgLevel: UInt, prev: PMP) = { - val beginsAfterLower = !(x < prev.comparand) - val beginsAfterUpper = !(x < comparand) - - val pgMask = pgLevelMap { idxBits => (((BigInt(1) << paddrBits) - (BigInt(1) << idxBits)).max(0)).U }(pgLevel) - val endsBeforeLower = (x & pgMask) < (prev.comparand & pgMask) - val endsBeforeUpper = (x & pgMask) < (comparand & pgMask) - - endsBeforeLower || beginsAfterUpper || (beginsAfterLower && endsBeforeUpper) - } - - // returns whether this PMP completely contains, or contains none of, a page - def homogeneous(x: UInt, pgLevel: UInt, prev: PMP): Bool = - Mux(napot, pow2Homogeneous(x, pgLevel), !torNotNAPOT || rangeHomogeneous(x, pgLevel, prev)) - - // returns whether this matching PMP fully contains the access - def aligned(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP): Bool = if (lgMaxSize <= pmpGranularity.log2) true.B - else { - val lsbMask = UIntToOH1(lgSize, lgMaxSize) - val straddlesLowerBound = - ((x >> lgMaxSize) ^ (prev.comparand >> lgMaxSize)) === 0.U && (prev.comparand(lgMaxSize - 1, 0) & ~x( - lgMaxSize - 1, - 0 - )) =/= 0.U - val straddlesUpperBound = ((x >> lgMaxSize) ^ (comparand >> lgMaxSize)) === 0.U && (comparand( - lgMaxSize - 1, - 0 - ) & (x(lgMaxSize - 1, 0) | lsbMask)) =/= 0.U - val rangeAligned = !(straddlesLowerBound || straddlesUpperBound) - val pow2Aligned = (lsbMask & ~mask(lgMaxSize - 1, 0)) === 0.U - Mux(napot, pow2Aligned, rangeAligned) - } - - // returns whether this PMP matches at least one byte of the access - def hit(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP): Bool = - Mux(napot, pow2Match(x, lgSize, lgMaxSize), torNotNAPOT && rangeMatch(x, lgSize, lgMaxSize, prev)) -} - -class PMPHomogeneityChecker(pmps: Seq[PMP])(implicit p: Parameters) { - def apply(addr: UInt, pgLevel: UInt): Bool = { - pmps - .foldLeft((true.B, 0.U.asTypeOf(new PMP))) { - case ((h, prev), pmp) => - (h && pmp.homogeneous(addr, pgLevel, prev), pmp) - } - ._1 - } -} - -class PMPChecker(lgMaxSize: Int)(implicit val p: Parameters) extends Module with HasCoreParameters { - val io = IO(new Bundle { - val prv = Input(UInt(PRV.SZ.W)) - val pmp = Input(Vec(nPMPs, new PMP)) - val addr = Input(UInt(paddrBits.W)) - val size = Input(UInt(log2Ceil(lgMaxSize + 1).W)) - val r = Output(Bool()) - val w = Output(Bool()) - val x = Output(Bool()) - }) + val paddrBits = parameter.paddrBits + val pmpGranularity = parameter.pmpGranularity + val lgMaxSize = parameter.lgMaxSize val default = if (io.pmp.isEmpty) true.B else io.prv > PRV.S.U - val pmp0 = WireInit(0.U.asTypeOf(new PMP)) + val pmp0 = WireInit(0.U.asTypeOf(new PMP(paddrBits))) pmp0.cfg.r := default pmp0.cfg.w := default pmp0.cfg.x := default - val res = (io.pmp.zip(pmp0 +: io.pmp)).reverse.foldLeft(pmp0) { + val res = io.pmp.zip(pmp0 +: io.pmp).reverse.foldLeft(pmp0) { case (prev, (pmp, prevPMP)) => - val hit = pmp.hit(io.addr, io.size, lgMaxSize, prevPMP) + val hit = PMP.hit(pmp, io.addr, io.size, lgMaxSize, prevPMP, pmpGranularity) val ignore = default && !pmp.cfg.l - val aligned = pmp.aligned(io.addr, io.size, lgMaxSize, prevPMP) - - for ( - (name, idx) <- Seq("no", "TOR", if (pmpGranularity <= 4) "NA4" else "", "NAPOT").zipWithIndex; if name.nonEmpty - ) - property - .cover(pmp.cfg.a === idx.U, s"The cfg access is set to ${name} access ", "Cover PMP access mode setting") - - property.cover(pmp.cfg.l === 0x1.U, s"The cfg lock is set to high ", "Cover PMP lock mode setting") - - // Not including Write and no Read permission as the combination is reserved - for ((name, idx) <- Seq("no", "RO", "", "RW", "X", "RX", "", "RWX").zipWithIndex; if name.nonEmpty) - property.cover( - (Cat(pmp.cfg.x, pmp.cfg.w, pmp.cfg.r) === idx.U), - s"The permission is set to ${name} access ", - "Cover PMP access permission setting" - ) - - for ( - (name, idx) <- Seq("", "TOR", if (pmpGranularity <= 4) "NA4" else "", "NAPOT").zipWithIndex; if name.nonEmpty - ) { - property.cover( - !ignore && hit && aligned && pmp.cfg.a === idx.U, - s"The access matches ${name} mode ", - "Cover PMP access" - ) - property.cover( - pmp.cfg.l && hit && aligned && pmp.cfg.a === idx.U, - s"The access matches ${name} mode with lock bit high", - "Cover PMP access with lock bit" - ) - } - + val aligned = PMP.aligned(pmp, io.addr, io.size, lgMaxSize, prevPMP, pmpGranularity) val cur = WireInit(pmp) cur.cfg.r := aligned && (pmp.cfg.r || ignore) cur.cfg.w := aligned && (pmp.cfg.w || ignore) From 0605c8cbc533b623b1fb122d1044268996a00cc5 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 06:44:13 +0800 Subject: [PATCH 056/140] [rocketv] add elaborator for PMAChecker - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PMPChecker config --nPMPs 8 --paddrBits 32 --lgMaxSize 4 --pmpGranularity 4 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PMPChecker design --parameter ./PMPChecker.json --run-firtool --- elaborator/src/rocketv/PMP.scala | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 elaborator/src/rocketv/PMP.scala diff --git a/elaborator/src/rocketv/PMP.scala b/elaborator/src/rocketv/PMP.scala new file mode 100644 index 000000000..bb8e69c10 --- /dev/null +++ b/elaborator/src/rocketv/PMP.scala @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{PMPChecker, PMPCheckerParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PMPChecker extends Elaborator { + @main + case class PMPCheckerParameterMain( + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "lgMaxSize") lgMaxSize: Int, + @arg(name = "pmpGranularity") pmpGranularity: Int) { + def convert: PMPCheckerParameter = PMPCheckerParameter( + nPMPs: Int, + paddrBits: Int, + lgMaxSize: Int, + pmpGranularity: Int + ) + } + + implicit def PMPCheckerParameterMainParser: ParserForClass[PMPCheckerParameterMain] = + ParserForClass[PMPCheckerParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PMPCheckerParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PMPChecker, PMPCheckerParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 5d02bca4f05f489b0330afcaf197464f661ededa Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 06:46:00 +0800 Subject: [PATCH 057/140] [rocketv] copy PTW into rocketv project --- rocketv/src/PTW.scala | 964 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 964 insertions(+) create mode 100644 rocketv/src/PTW.scala diff --git a/rocketv/src/PTW.scala b/rocketv/src/PTW.scala new file mode 100644 index 000000000..87ef93057 --- /dev/null +++ b/rocketv/src/PTW.scala @@ -0,0 +1,964 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{ + is, + isPow2, + log2Ceil, + switch, + Arbiter, + Cat, + Decoupled, + Enum, + Mux1H, + OHToUInt, + PopCount, + PriorityEncoder, + PriorityEncoderOH, + RegEnable, + UIntToOH, + Valid +} +import chisel3.withClock +import chisel3.experimental.SourceInfo +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.subsystem.CacheBlockBytes +import freechips.rocketchip.tile._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property + +import scala.collection.mutable.ListBuffer + +/** PTE request from TLB to PTW + * + * TLB send a PTE request to PTW when L1TLB miss + */ +class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { + val addr = UInt(vpnBits.W) + val need_gpa = Bool() + val vstage1 = Bool() + val stage2 = Bool() +} + +/** PTE info from L2TLB to TLB + * + * containing: target PTE, exceptions, two-satge tanslation info + */ +class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { + + /** ptw access exception */ + val ae_ptw = Bool() + + /** final access exception */ + val ae_final = Bool() + + /** page fault */ + val pf = Bool() + + /** guest page fault */ + val gf = Bool() + + /** hypervisor read */ + val hr = Bool() + + /** hypervisor write */ + val hw = Bool() + + /** hypervisor execute */ + val hx = Bool() + + /** PTE to refill L1TLB + * + * source: L2TLB + */ + val pte = new PTE + + /** pte pglevel */ + val level = UInt(log2Ceil(pgLevels).W) + + /** fragmented_superpage support */ + val fragmented_superpage = Bool() + + /** homogeneous for both pma and pmp */ + val homogeneous = Bool() + val gpa = Valid(UInt(vaddrBits.W)) + val gpa_is_pte = Bool() +} + +/** IO between TLB and PTW + * + * PTW receives : + * - PTE request + * - CSRs info + * - pmp results from PMP(in TLB) + */ +class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { + val req = Decoupled(Valid(new PTWReq)) + val resp = Flipped(Valid(new PTWResp)) + val ptbr = Input(new PTBR()) + val hgatp = Input(new PTBR()) + val vsatp = Input(new PTBR()) + val status = Input(new MStatus()) + val hstatus = Input(new HStatus()) + val gstatus = Input(new MStatus()) + val pmp = Input(Vec(nPMPs, new PMP)) + val customCSRs = Flipped(coreParams.customCSRs) +} + +/** PTW performance statistics */ +class PTWPerfEvents extends Bundle { + val l2miss = Bool() + val l2hit = Bool() + val pte_miss = Bool() + val pte_hit = Bool() +} + +/** Datapath IO between PTW and Core + * + * PTW receives CSRs info, pmp checks, sfence instruction info + * + * PTW sends its performance statistics to core + */ +class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { + val ptbr = Input(new PTBR()) + val hgatp = Input(new PTBR()) + val vsatp = Input(new PTBR()) + val sfence = Flipped(Valid(new SFenceReq)) + val status = Input(new MStatus()) + val hstatus = Input(new HStatus()) + val gstatus = Input(new MStatus()) + val pmp = Input(Vec(nPMPs, new PMP)) + val perf = Output(new PTWPerfEvents()) + val customCSRs = Flipped(coreParams.customCSRs) + + /** enable clock generated by ptw */ + val clock_enabled = Output(Bool()) +} + +/** PTE template for transmission + * + * contains useful methods to check PTE attributes + * @see RV-priv spec 4.3.1 for pgae table entry format + */ +class PTE(implicit p: Parameters) extends CoreBundle()(p) { + val reserved_for_future = UInt(10.W) + val ppn = UInt(44.W) + val reserved_for_software = Bits(2.W) + + /** dirty bit */ + val d = Bool() + + /** access bit */ + val a = Bool() + + /** global mapping */ + val g = Bool() + + /** user mode accessible */ + val u = Bool() + + /** whether the page is executable */ + val x = Bool() + + /** whether the page is writable */ + val w = Bool() + + /** whether the page is readable */ + val r = Bool() + + /** valid bit */ + val v = Bool() + + /** return true if find a pointer to next level page table */ + def table(dummy: Int = 0) = v && !r && !w && !x && !d && !a && !u && reserved_for_future === 0.U + + /** return true if find a leaf PTE */ + def leaf(dummy: Int = 0) = v && (r || (x && !w)) && a + + /** user read */ + def ur(dummy: Int = 0) = sr() && u + + /** user write */ + def uw(dummy: Int = 0) = sw() && u + + /** user execute */ + def ux(dummy: Int = 0) = sx() && u + + /** supervisor read */ + def sr(dummy: Int = 0) = leaf() && r + + /** supervisor write */ + def sw(dummy: Int = 0) = leaf() && w && d + + /** supervisor execute */ + def sx(dummy: Int = 0) = leaf() && x + + /** full permission: writable and executable in user mode */ + def isFullPerm(dummy: Int = 0) = uw() && ux() +} + +/** L2TLB PTE template + * + * contains tag bits + * @param nSets number of sets in L2TLB + * @see RV-priv spec 4.3.1 for page table entry format + */ +class L2TLBEntry(nSets: Int)(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { + val idxBits = log2Ceil(nSets) + val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0) + val tag = UInt(tagBits.W) + val ppn = UInt(ppnBits.W) + + /** dirty bit */ + val d = Bool() + + /** access bit */ + val a = Bool() + + /** user mode accessible */ + val u = Bool() + + /** whether the page is executable */ + val x = Bool() + + /** whether the page is writable */ + val w = Bool() + + /** whether the page is readable */ + val r = Bool() + +} + +/** PTW contains L2TLB, and performs page table walk for high level TLB, and cache queries from L1 TLBs(I$, D$, RoCC) + * + * It performs hierarchy page table query to mem for the desired leaf PTE and cache them in l2tlb. + * Besides leaf PTEs, it also caches non-leaf PTEs in pte_cache to accerlerate the process. + * + * ==Structure== + * - l2tlb : for leaf PTEs + * - set-associative (configurable with [[CoreParams.nL2TLBEntries]]and [[CoreParams.nL2TLBWays]])) + * - PLRU + * - pte_cache: for non-leaf PTEs + * - set-associative + * - LRU + * - s2_pte_cache: for non-leaf PTEs in 2-stage translation + * - set-associative + * - PLRU + * + * l2tlb Pipeline: 3 stage + * {{{ + * stage 0 : read + * stage 1 : decode + * stage 2 : hit check + * }}} + * ==State Machine== + * s_ready: ready to reveive request from TLB + * s_req: request mem; pte_cache hit judge + * s_wait1: deal with l2tlb error + * s_wait2: final hit judge + * s_wait3: receive mem response + * s_fragment_superpage: for superpage PTE + * + * @note l2tlb hit happens in s_req or s_wait1 + * @see RV-priv spec 4.3-4.6 for Virtual-Memory System + * @see RV-priv spec 8.5 for Two-Stage Address Translation + * @todo details in two-stage translation + */ +class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) { + val io = IO(new Bundle { + + /** to n TLB */ + val requestor = Flipped(Vec(n, new TLBPTWIO)) + + /** to HellaCache */ + val mem = new HellaCacheIO + + /** to Core + * + * contains CSRs info and performance statistics + */ + val dpath = new DatapathPTWIO + }) + + val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(8) + val state = RegInit(s_ready) + val l2_refill_wire = Wire(Bool()) + + /** Arbiter to arbite request from n TLB */ + val arb = Module(new Arbiter(Valid(new PTWReq), n)) + // use TLB req as arbitor's input + arb.io.in <> io.requestor.map(_.req) + // receive req only when s_ready and not in refill + arb.io.out.ready := (state === s_ready) && !l2_refill_wire + + val resp_valid = RegNext(VecInit(Seq.fill(io.requestor.size)(false.B))) + + val clock_en = + state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate + io.dpath.clock_enabled := usingVM.B && clock_en + val gated_clock = + if (!usingVM || !tileParams.dcache.get.clockGate) clock + else ClockGate(clock, clock_en, "ptw_clock_gate") + withClock(gated_clock) { // entering gated-clock domain + + val invalidated = Reg(Bool()) + + /** current PTE level + * {{{ + * 0 <= count <= pgLevel-1 + * count = pgLevel - 1 : leaf PTE + * count < pgLevel - 1 : non-leaf PTE + * }}} + */ + val count = Reg(UInt(log2Ceil(pgLevels).W)) + val resp_ae_ptw = Reg(Bool()) + val resp_ae_final = Reg(Bool()) + val resp_pf = Reg(Bool()) + val resp_gf = Reg(Bool()) + val resp_hr = Reg(Bool()) + val resp_hw = Reg(Bool()) + val resp_hx = Reg(Bool()) + val resp_fragmented_superpage = Reg(Bool()) + + /** tlb request */ + val r_req = Reg(new PTWReq) + + /** current selected way in arbitor */ + val r_req_dest = Reg(Bits()) + // to respond to L1TLB : l2_hit + // to construct mem.req.addr + val r_pte = Reg(new PTE) + val r_hgatp = Reg(new PTBR) + // 2-stage pageLevel + val aux_count = Reg(UInt(log2Ceil(pgLevels).W)) + + /** pte for 2-stage translation */ + val aux_pte = Reg(new PTE) + val aux_ppn_hi = (pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth) + .option(Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W))) + val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case + val stage2 = Reg(Bool()) + val stage2_final = Reg(Bool()) + + val satp = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp, io.dpath.ptbr) + val r_hgatp_initial_count = pgLevels.U - minPgLevels.U - r_hgatp.additionalPgLevels + + /** 2-stage translation both enable */ + val do_both_stages = r_req.vstage1 && r_req.stage2 + val max_count = count.max(aux_count) + val vpn = Mux(r_req.vstage1 && stage2, aux_pte.ppn, r_req.addr) + + val mem_resp_valid = RegNext(io.mem.resp.valid) + val mem_resp_data = RegNext(io.mem.resp.bits.data) + io.mem.uncached_resp.map { resp => + assert(!(resp.valid && io.mem.resp.valid)) + resp.ready := true.B + when(resp.valid) { + mem_resp_valid := true.B + mem_resp_data := resp.bits.data + } + } + // construct pte from mem.resp + val (pte, invalid_paddr) = { + val tmp = mem_resp_data.asTypeOf(new PTE()) + val res = WireDefault(tmp) + res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth) - 1, 0), tmp.ppn(ppnBits - 1, 0)) + when(tmp.r || tmp.w || tmp.x) { + // for superpage mappings, make sure PPN LSBs are zero + for (i <- 0 until pgLevels - 1) + when( + count <= i.U && tmp.ppn((pgLevels - 1 - i) * pgLevelBits - 1, (pgLevels - 2 - i) * pgLevelBits) =/= 0.U + ) { res.v := false.B } + } + (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits) =/= 0.U, (tmp.ppn >> ppnBits) =/= 0.U)) + } + // find non-leaf PTE, need traverse + val traverse = pte.table() && !invalid_paddr && count < (pgLevels - 1).U + + /** address send to mem for enquerry */ + val pte_addr = + if (!usingVM) 0.U + else { + val vpn_idxs = (0 until pgLevels).map { i => + val width = pgLevelBits + (if (i <= pgLevels - minPgLevels) hypervisorExtraAddrBits else 0) + (vpn >> (pgLevels - i - 1) * pgLevelBits)(width - 1, 0) + } + val mask = Mux( + stage2 && count === r_hgatp_initial_count, + ((1 << (hypervisorExtraAddrBits + pgLevelBits)) - 1).U, + ((1 << pgLevelBits) - 1).U + ) + val vpn_idx = vpn_idxs(count) & mask + val raw_pte_addr = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8) + val size = if (usingHypervisor) vaddrBits else paddrBits + //use r_pte.ppn as page table base address + //use vpn slice as offset + raw_pte_addr.apply(size.min(raw_pte_addr.getWidth) - 1, 0) + } + + /** pte_cache input addr */ + val pte_cache_addr = + if (!usingHypervisor) pte_addr + else { + val vpn_idxs = (0 until pgLevels - 1).map { i => + val ext_aux_pte_ppn = aux_ppn_hi match { + case None => aux_pte.ppn + case Some(hi) => Cat(hi, aux_pte.ppn) + } + (ext_aux_pte_ppn >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0) + } + val vpn_idx = vpn_idxs(count) + val raw_pte_cache_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen / 8) + raw_pte_cache_addr(vaddrBits.min(raw_pte_cache_addr.getWidth) - 1, 0) + } + + /** stage2_pte_cache input addr */ + val stage2_pte_cache_addr = + if (!usingHypervisor) 0.U + else { + val vpn_idxs = (0 until pgLevels - 1).map { i => + (r_req.addr >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0) + } + val vpn_idx = vpn_idxs(aux_count) + val raw_s2_pte_cache_addr = Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8) + raw_s2_pte_cache_addr(vaddrBits.min(raw_s2_pte_cache_addr.getWidth) - 1, 0) + } + + def makeFragmentedSuperpagePPN(ppn: UInt): Seq[UInt] = { + (pgLevels - 1 until 0 by -1).map(i => + Cat(ppn >> (pgLevelBits * i), r_req.addr(((pgLevelBits * i).min(vpnBits)) - 1, 0).padTo(pgLevelBits * i)) + ) + } + + /** PTECache caches non-leaf PTE + * @param s2 true: 2-stage address translation + */ + def makePTECache(s2: Boolean): (Bool, UInt) = if (coreParams.nPTECacheEntries == 0) { + (false.B, 0.U) + } else { + val plru = new PseudoLRU(coreParams.nPTECacheEntries) + val valid = RegInit(0.U(coreParams.nPTECacheEntries.W)) + val tags = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor) 1 + vaddrBits else paddrBits).W))) + // not include full pte, only ppn + val data = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor && s2) vpnBits else ppnBits).W))) + val can_hit = + if (s2) + count === r_hgatp_initial_count && aux_count < (pgLevels - 1).U && r_req.vstage1 && stage2 && !stage2_final + else count < (pgLevels - 1).U && Mux(r_req.vstage1, stage2, !r_req.stage2) + val can_refill = + if (s2) do_both_stages && !stage2 && !stage2_final + else can_hit + val tag = + if (s2) Cat(true.B, stage2_pte_cache_addr.padTo(vaddrBits)) + else Cat(r_req.vstage1, pte_cache_addr.padTo(if (usingHypervisor) vaddrBits else paddrBits)) + + val hits = tags.map(_ === tag).asUInt & valid + val hit = hits.orR && can_hit + // refill with mem response + when(mem_resp_valid && traverse && can_refill && !hits.orR && !invalidated) { + val r = Mux(valid.andR, plru.way, PriorityEncoder(~valid)) + valid := valid | UIntToOH(r) + tags(r) := tag + data(r) := pte.ppn + plru.access(r) + } + // replace + when(hit && state === s_req) { plru.access(OHToUInt(hits)) } + when(io.dpath.sfence.valid && (!io.dpath.sfence.bits.rs1 || usingHypervisor.B && io.dpath.sfence.bits.hg)) { + valid := 0.U + } + + val lcount = if (s2) aux_count else count + for (i <- 0 until pgLevels - 1) { + ccover(hit && state === s_req && lcount === i.U, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i") + } + + (hit, Mux1H(hits, data)) + } + // generate pte_cache + val (pte_cache_hit, pte_cache_data) = makePTECache(false) + // generate pte_cache with 2-stage translation + val (stage2_pte_cache_hit, stage2_pte_cache_data) = makePTECache(true) + // pte_cache hit or 2-stage pte_cache hit + val pte_hit = RegNext(false.B) + io.dpath.perf.pte_miss := false.B + io.dpath.perf.pte_hit := pte_hit && (state === s_req) && !io.dpath.perf.l2hit + assert( + !(io.dpath.perf.l2hit && (io.dpath.perf.pte_miss || io.dpath.perf.pte_hit)), + "PTE Cache Hit/Miss Performance Monitor Events are lower priority than L2TLB Hit event" + ) + // l2_refill happens when find the leaf pte + val l2_refill = RegNext(false.B) + l2_refill_wire := l2_refill + io.dpath.perf.l2miss := false.B + io.dpath.perf.l2hit := false.B + // l2tlb + val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = + if (coreParams.nL2TLBEntries == 0) (false.B, false.B, WireDefault(0.U.asTypeOf(new PTE)), None) + else { + val code = new ParityCode + require(isPow2(coreParams.nL2TLBEntries)) + require(isPow2(coreParams.nL2TLBWays)) + require(coreParams.nL2TLBEntries >= coreParams.nL2TLBWays) + val nL2TLBSets = coreParams.nL2TLBEntries / coreParams.nL2TLBWays + require(isPow2(nL2TLBSets)) + val idxBits = log2Ceil(nL2TLBSets) + + val l2_plru = new SetAssocLRU(nL2TLBSets, coreParams.nL2TLBWays, "plru") + + val ram = DescribedSRAM( + name = "l2_tlb_ram", + desc = "L2 TLB", + size = nL2TLBSets, + data = Vec(coreParams.nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets).getWidth).W)) + ) + + val g = Reg(Vec(coreParams.nL2TLBWays, UInt(nL2TLBSets.W))) + val valid = RegInit(VecInit(Seq.fill(coreParams.nL2TLBWays)(0.U(nL2TLBSets.W)))) + // use r_req to construct tag + val (r_tag, r_idx) = Split(Cat(r_req.vstage1, r_req.addr(maxSVAddrBits - pgIdxBits - 1, 0)), idxBits) + + /** the valid vec for the selected set(including n ways) */ + val r_valid_vec = valid.map(_(r_idx)).asUInt + val r_valid_vec_q = Reg(UInt(coreParams.nL2TLBWays.W)) + val r_l2_plru_way = Reg(UInt(log2Ceil(coreParams.nL2TLBWays.max(1)).W)) + r_valid_vec_q := r_valid_vec + // replacement way + r_l2_plru_way := (if (coreParams.nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U) + // refill with r_pte(leaf pte) + when(l2_refill && !invalidated) { + val entry = Wire(new L2TLBEntry(nL2TLBSets)) + entry.ppn := r_pte.ppn + entry.d := r_pte.d + entry.a := r_pte.a + entry.u := r_pte.u + entry.x := r_pte.x + entry.w := r_pte.w + entry.r := r_pte.r + entry.tag := r_tag + // if all the way are valid, use plru to select one way to be replaced, + // otherwise use PriorityEncoderOH to select one + val wmask = + if (coreParams.nL2TLBWays > 1) + Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, coreParams.nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) + else 1.U(1.W) + ram.write(r_idx, VecInit(Seq.fill(coreParams.nL2TLBWays)(code.encode(entry.asUInt))), wmask.asBools) + + val mask = UIntToOH(r_idx) + for (way <- 0 until coreParams.nL2TLBWays) { + when(wmask(way)) { + valid(way) := valid(way) | mask + g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask) + } + } + } + // sfence happens + when(io.dpath.sfence.valid) { + val hg = usingHypervisor.B && io.dpath.sfence.bits.hg + for (way <- 0 until coreParams.nL2TLBWays) { + valid(way) := + Mux( + !hg && io.dpath.sfence.bits.rs1, + valid(way) & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits + pgIdxBits - 1, pgIdxBits)), + Mux(!hg && io.dpath.sfence.bits.rs2, valid(way) & g(way), 0.U) + ) + } + } + + val s0_valid = !l2_refill && arb.io.out.fire + val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.need_gpa + val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid) + val s2_valid = RegNext(s1_valid) + // read from tlb idx + val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits - 1, 0), s0_valid) + val s2_rdata = s1_rdata.map(s1_rdway => code.decode(RegEnable(s1_rdway, s1_valid))) + val s2_valid_vec = RegEnable(r_valid_vec, s1_valid) + val s2_g_vec = RegEnable(VecInit(g.map(_(r_idx))), s1_valid) + val s2_error = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR + when(s2_valid && s2_error) { valid.foreach { _ := 0.U } } + // decode + val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets))) + val s2_hit_vec = + (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag)) + val s2_hit = s2_valid && s2_hit_vec.orR + io.dpath.perf.l2miss := s2_valid && !(s2_hit_vec.orR) + io.dpath.perf.l2hit := s2_hit + when(s2_hit) { + l2_plru.access(r_idx, OHToUInt(s2_hit_vec)) + assert((PopCount(s2_hit_vec) === 1.U) || s2_error, "L2 TLB multi-hit") + } + + val s2_pte = Wire(new PTE) + val s2_hit_entry = Mux1H(s2_hit_vec, s2_entry_vec) + s2_pte.ppn := s2_hit_entry.ppn + s2_pte.d := s2_hit_entry.d + s2_pte.a := s2_hit_entry.a + s2_pte.g := Mux1H(s2_hit_vec, s2_g_vec) + s2_pte.u := s2_hit_entry.u + s2_pte.x := s2_hit_entry.x + s2_pte.w := s2_hit_entry.w + s2_pte.r := s2_hit_entry.r + s2_pte.v := true.B + s2_pte.reserved_for_future := 0.U + s2_pte.reserved_for_software := 0.U + + for (way <- 0 until coreParams.nL2TLBWays) { + ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way") + } + + (s2_hit, s2_error, s2_pte, Some(ram)) + } + + // if SFENCE occurs during walk, don't refill PTE cache or L2 TLB until next walk + invalidated := io.dpath.sfence.valid || (invalidated && state =/= s_ready) + // mem request + io.mem.keep_clock_enabled := false.B + + io.mem.req.valid := state === s_req || state === s_dummy1 + io.mem.req.bits.phys := true.B + io.mem.req.bits.cmd := M_XRD + io.mem.req.bits.size := log2Ceil(xLen / 8).U + io.mem.req.bits.signed := false.B + io.mem.req.bits.addr := pte_addr + io.mem.req.bits.idx.foreach(_ := pte_addr) + io.mem.req.bits.dprv := PRV.S.U // PTW accesses are S-mode by definition + io.mem.req.bits.dv := do_both_stages && !stage2 + io.mem.req.bits.tag := DontCare + io.mem.req.bits.no_alloc := DontCare + io.mem.req.bits.no_xcpt := DontCare + io.mem.req.bits.data := DontCare + io.mem.req.bits.mask := DontCare + + io.mem.s1_kill := l2_hit || state =/= s_wait1 + io.mem.s1_data := DontCare + io.mem.s2_kill := false.B + + val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits) + require(!usingHypervisor || pageGranularityPMPs, s"hypervisor requires pmpGranularity >= ${1 << pgIdxBits}") + + val pmaPgLevelHomogeneous = (0 until pgLevels).map { i => + val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)) + if (pageGranularityPMPs && i == pgLevels - 1) { + require( + TLBPageLookup.homogeneous(edge.manager.managers, pgSize), + s"All memory regions must be $pgSize-byte aligned" + ) + true.B + } else { + TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous + } + } + val pmaHomogeneous = pmaPgLevelHomogeneous(count) + val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp).apply(r_pte.ppn << pgIdxBits, count) + val homogeneous = pmaHomogeneous && pmpHomogeneous + // response to tlb + for (i <- 0 until io.requestor.size) { + io.requestor(i).resp.valid := resp_valid(i) + io.requestor(i).resp.bits.ae_ptw := resp_ae_ptw + io.requestor(i).resp.bits.ae_final := resp_ae_final + io.requestor(i).resp.bits.pf := resp_pf + io.requestor(i).resp.bits.gf := resp_gf + io.requestor(i).resp.bits.hr := resp_hr + io.requestor(i).resp.bits.hw := resp_hw + io.requestor(i).resp.bits.hx := resp_hx + io.requestor(i).resp.bits.pte := r_pte + io.requestor(i).resp.bits.level := max_count + io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs.B + io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs.B + io.requestor(i).resp.bits.gpa.valid := r_req.need_gpa + io.requestor(i).resp.bits.gpa.bits := + Cat( + Mux( + !stage2_final || !r_req.vstage1 || aux_count === (pgLevels - 1).U, + aux_pte.ppn, + makeFragmentedSuperpagePPN(aux_pte.ppn)(aux_count) + ), + gpa_pgoff + ) + io.requestor(i).resp.bits.gpa_is_pte := !stage2_final + io.requestor(i).ptbr := io.dpath.ptbr + io.requestor(i).hgatp := io.dpath.hgatp + io.requestor(i).vsatp := io.dpath.vsatp + io.requestor(i).customCSRs <> io.dpath.customCSRs + io.requestor(i).status := io.dpath.status + io.requestor(i).hstatus := io.dpath.hstatus + io.requestor(i).gstatus := io.dpath.gstatus + io.requestor(i).pmp := io.dpath.pmp + } + + // control state machine + val next_state = WireDefault(state) + state := OptimizationBarrier(next_state) + val do_switch = WireDefault(false.B) + + switch(state) { + is(s_ready) { + when(arb.io.out.fire) { + val satp_initial_count = pgLevels.U - minPgLevels.U - satp.additionalPgLevels + val vsatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.vsatp.additionalPgLevels + val hgatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.hgatp.additionalPgLevels + val aux_ppn = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp.ppn, arb.io.out.bits.bits.addr) + + r_req := arb.io.out.bits.bits + r_req_dest := arb.io.chosen + next_state := Mux(arb.io.out.bits.valid, s_req, s_ready) + stage2 := arb.io.out.bits.bits.stage2 + stage2_final := arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.vstage1 + count := Mux(arb.io.out.bits.bits.stage2, hgatp_initial_count, satp_initial_count) + aux_count := Mux(arb.io.out.bits.bits.vstage1, vsatp_initial_count, 0.U) + aux_pte.ppn := aux_ppn + aux_ppn_hi.foreach { _ := aux_ppn >> aux_pte.ppn.getWidth } + aux_pte.reserved_for_future := 0.U + resp_ae_ptw := false.B + resp_ae_final := false.B + resp_pf := false.B + resp_gf := false.B + resp_hr := true.B + resp_hw := true.B + resp_hx := true.B + resp_fragmented_superpage := false.B + r_hgatp := io.dpath.hgatp + + assert(!arb.io.out.bits.bits.need_gpa || arb.io.out.bits.bits.stage2) + } + } + is(s_req) { + when(stage2 && count === r_hgatp_initial_count) { + gpa_pgoff := Mux(aux_count === (pgLevels - 1).U, r_req.addr << (xLen / 8).log2, stage2_pte_cache_addr) + } + // pte_cache hit + when(stage2_pte_cache_hit) { + aux_count := aux_count + 1.U + aux_pte.ppn := stage2_pte_cache_data + aux_ppn_hi.foreach { _ := 0.U } + aux_pte.reserved_for_future := 0.U + pte_hit := true.B + }.elsewhen(pte_cache_hit) { + count := count + 1.U + pte_hit := true.B + }.otherwise { + next_state := Mux(io.mem.req.ready, s_wait1, s_req) + } + } + is(s_wait1) { + // This Mux is for the l2_error case; the l2_hit && !l2_error case is overriden below + next_state := Mux(l2_hit, s_req, s_wait2) + } + is(s_wait2) { + next_state := s_wait3 + io.dpath.perf.pte_miss := count < (pgLevels - 1).U + when(io.mem.s2_xcpt.ae.ld) { + resp_ae_ptw := true.B + next_state := s_ready + resp_valid(r_req_dest) := true.B + } + } + is(s_fragment_superpage) { + next_state := s_ready + resp_valid(r_req_dest) := true.B + when(!homogeneous) { + count := (pgLevels - 1).U + resp_fragmented_superpage := true.B + } + when(do_both_stages) { + resp_fragmented_superpage := true.B + } + } + } + + val merged_pte = { + val superpage_masks = (0 until pgLevels).map(i => + ((BigInt(1) << pte.ppn.getWidth) - (BigInt(1) << (pgLevels - 1 - i) * pgLevelBits)).U + ) + val superpage_mask = superpage_masks(Mux(stage2_final, max_count, (pgLevels - 1).U)) + val stage1_ppns = (0 until pgLevels - 1).map(i => + Cat( + pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), + aux_pte.ppn((pgLevels - i - 1) * pgLevelBits - 1, 0) + ) + ) :+ pte.ppn + val stage1_ppn = stage1_ppns(count) + makePTE(stage1_ppn & superpage_mask, aux_pte) + } + + r_pte := OptimizationBarrier( + // l2tlb hit->find a leaf PTE(l2_pte), respond to L1TLB + Mux( + l2_hit && !l2_error, + l2_pte, + // S2 PTE cache hit -> proceed to the next level of walking, update the r_pte with hgatp + Mux( + state === s_req && stage2_pte_cache_hit, + makeHypervisorRootPTE(r_hgatp, stage2_pte_cache_data, l2_pte), + // pte cache hit->find a non-leaf PTE(pte_cache),continue to request mem + Mux( + state === s_req && pte_cache_hit, + makePTE(pte_cache_data, l2_pte), + // 2-stage translation + Mux( + do_switch, + makeHypervisorRootPTE(r_hgatp, pte.ppn, r_pte), + // when mem respond, store mem.resp.pte + Mux( + mem_resp_valid, + Mux(!traverse && r_req.vstage1 && stage2, merged_pte, pte), + // fragment_superpage + Mux( + state === s_fragment_superpage && !homogeneous && count =/= (pgLevels - 1).U, + makePTE(makeFragmentedSuperpagePPN(r_pte.ppn)(count), r_pte), + // when tlb request come->request mem, use root address in satp(or vsatp,hgatp) + Mux( + arb.io.out.fire, + Mux( + arb.io.out.bits.bits.stage2, + makeHypervisorRootPTE(io.dpath.hgatp, io.dpath.vsatp.ppn, r_pte), + makePTE(satp.ppn, r_pte) + ), + r_pte + ) + ) + ) + ) + ) + ) + ) + ) + + when(l2_hit && !l2_error) { + assert(state === s_req || state === s_wait1) + next_state := s_ready + resp_valid(r_req_dest) := true.B + count := (pgLevels - 1).U + } + when(mem_resp_valid) { + assert(state === s_wait3) + next_state := s_req + when(traverse) { + when(do_both_stages && !stage2) { do_switch := true.B } + count := count + 1.U + }.otherwise { + val gf = stage2 && !stage2_final && !pte.ur() + val ae = pte.v && invalid_paddr + val pf = pte.v && pte.reserved_for_future =/= 0.U + val success = pte.v && !ae && !pf && !gf + + when(do_both_stages && !stage2_final && success) { + when(stage2) { + stage2 := false.B + count := aux_count + }.otherwise { + stage2_final := true.B + do_switch := true.B + } + }.otherwise { + // find a leaf pte, start l2 refill + l2_refill := success && count === (pgLevels - 1).U && !r_req.need_gpa && + (!r_req.vstage1 && !r_req.stage2 || + do_both_stages && aux_count === (pgLevels - 1).U && pte.isFullPerm()) + count := max_count + + when( + pageGranularityPMPs.B && !(count === (pgLevels - 1).U && (!do_both_stages || aux_count === (pgLevels - 1).U)) + ) { + next_state := s_fragment_superpage + }.otherwise { + next_state := s_ready + resp_valid(r_req_dest) := true.B + } + + resp_ae_ptw := ae && count < (pgLevels - 1).U && pte.table() + resp_ae_final := ae + resp_pf := pf && !stage2 + resp_gf := gf || (pf && stage2) + resp_hr := !stage2 || (!pf && !gf && pte.ur()) + resp_hw := !stage2 || (!pf && !gf && pte.uw()) + resp_hx := !stage2 || (!pf && !gf && pte.ux()) + } + } + } + when(io.mem.s2_nack) { + assert(state === s_wait2) + next_state := s_req + } + + when(do_switch) { + aux_count := Mux(traverse, count + 1.U, count) + count := r_hgatp_initial_count + aux_pte := Mux( + traverse, + pte, { + val s1_ppns = (0 until pgLevels - 1).map(i => + Cat( + pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), + r_req + .addr((((pgLevels - i - 1) * pgLevelBits).min(vpnBits)) - 1, 0) + .padTo((pgLevels - i - 1) * pgLevelBits) + ) + ) :+ pte.ppn + makePTE(s1_ppns(count), pte) + } + ) + aux_ppn_hi.foreach { _ := 0.U } + stage2 := true.B + } + + for (i <- 0 until pgLevels) { + val leaf = mem_resp_valid && !traverse && count === i.U + ccover( + leaf && pte.v && !invalid_paddr && pte.reserved_for_future === 0.U, + s"L$i", + s"successful page-table access, level $i" + ) + ccover(leaf && pte.v && invalid_paddr, s"L${i}_BAD_PPN_MSB", s"PPN too large, level $i") + ccover(leaf && pte.v && pte.reserved_for_future =/= 0.U, s"L${i}_BAD_RSV_MSB", s"reserved MSBs set, level $i") + ccover(leaf && !mem_resp_data(0), s"L${i}_INVALID_PTE", s"page not present, level $i") + if (i != pgLevels - 1) + ccover(leaf && !pte.v && mem_resp_data(0), s"L${i}_BAD_PPN_LSB", s"PPN LSBs not zero, level $i") + } + ccover(mem_resp_valid && count === (pgLevels - 1).U && pte.table(), s"TOO_DEEP", s"page table too deep") + ccover(io.mem.s2_nack, "NACK", "D$ nacked page-table access") + ccover(state === s_wait2 && io.mem.s2_xcpt.ae.ld, "AE", "access exception while walking page table") + + } // leaving gated-clock domain + + private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + if (usingVM) property.cover(cond, s"PTW_$label", "MemorySystem;;" + desc) + + /** Relace PTE.ppn with ppn */ + private def makePTE(ppn: UInt, default: PTE) = { + val pte = WireDefault(default) + pte.ppn := ppn + pte + } + + /** use hgatp and vpn to construct a new ppn */ + private def makeHypervisorRootPTE(hgatp: PTBR, vpn: UInt, default: PTE) = { + val count = pgLevels.U - minPgLevels.U - hgatp.additionalPgLevels + val idxs = (0 to pgLevels - minPgLevels).map(i => (vpn >> (pgLevels - i) * pgLevelBits)) + val lsbs = WireDefault(UInt(maxHypervisorExtraAddrBits.W), idxs(count)) + val pte = WireDefault(default) + pte.ppn := Cat(hgatp.ppn >> maxHypervisorExtraAddrBits, lsbs) + pte + } +} + +/** Mix-ins for constructing tiles that might have a PTW */ +trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile => + val module: CanHavePTWModule + var nPTWPorts = 1 + nDCachePorts += usingPTW.toInt +} + +trait CanHavePTWModule extends HasHellaCacheModule { + val outer: CanHavePTW + val ptwPorts = ListBuffer(outer.dcache.module.io.ptw) + val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p)) + ptw.io.mem <> DontCare + if (outer.usingPTW) { + dcachePorts += ptw.io.mem + } +} From 5dc4396ab063d670e876b09faecfafd669b36eed Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Fri, 28 Jun 2024 16:32:45 +0800 Subject: [PATCH 058/140] [rocketv] copy ECC into rocketv project --- rocketv/src/ECC.scala | 235 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 rocketv/src/ECC.scala diff --git a/rocketv/src/ECC.scala b/rocketv/src/ECC.scala new file mode 100644 index 000000000..0c7c5c920 --- /dev/null +++ b/rocketv/src/ECC.scala @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util._ +import chisel3.util.random.LFSR + +abstract class Decoding +{ + def uncorrected: UInt + def corrected: UInt + def correctable: Bool + def uncorrectable: Bool // If true, correctable should be ignored + def error = correctable || uncorrectable +} + +abstract class Code +{ + def canDetect: Boolean + def canCorrect: Boolean + + def width(w0: Int): Int + + /** Takes the unencoded width and returns a list of indices indicating which + * bits of the encoded value will be used for ecc + */ + def eccIndices(width: Int): Seq[Int] + + /** Encode x to a codeword suitable for decode. + * If poison is true, the decoded value will report uncorrectable + * error despite uncorrected == corrected == x. + */ + def encode(x: UInt, poison: Bool = false.B): UInt + def decode(x: UInt): Decoding + + /** Copy the bits in x to the right bit positions in an encoded word, + * so that x === decode(swizzle(x)).uncorrected; but don't generate + * the other code bits, so decode(swizzle(x)).error might be true. + * For codes for which this operation is not trivial, throw an + * UnsupportedOperationException. */ + def swizzle(x: UInt): UInt +} + +class IdentityCode extends Code +{ + def canDetect = false + def canCorrect = false + + def width(w0: Int) = w0 + def eccIndices(width: Int) = Seq.empty[Int] + def encode(x: UInt, poison: Bool = false.B) = { + require (poison.isLit && poison.litValue == 0, "IdentityCode can not be poisoned") + x + } + def swizzle(x: UInt) = x + def decode(y: UInt) = new Decoding { + def uncorrected = y + def corrected = y + def correctable = false.B + def uncorrectable = false.B + } +} + +class ParityCode extends Code +{ + def canDetect = true + def canCorrect = false + + def width(w0: Int) = w0+1 + def eccIndices(w0: Int) = Seq(w0) + def encode(x: UInt, poison: Bool = false.B) = Cat(x.xorR ^ poison, x) + def swizzle(x: UInt) = Cat(false.B, x) + def decode(y: UInt) = new Decoding { + val uncorrected = y(y.getWidth-2,0) + val corrected = uncorrected + val correctable = false.B + val uncorrectable = y.xorR + } +} + +class SECCode extends Code +{ + def canDetect = true + def canCorrect = true + + // SEC codes may or may not be poisonous depending on the length + // If the code is perfect, every non-codeword is correctable + def poisonous(n: Int) = !isPow2(n+1) + + def width(k: Int) = { + val m = log2Floor(k) + 1 + k + m + (if((1 << m) < m+k+1) 1 else 0) + } + + def eccIndices(w0: Int) = { + (0 until width(w0)).collect { + case i if i >= w0 => i + } + } + + def swizzle(x: UInt) = { + val k = x.getWidth + val n = width(k) + Cat(0.U((n-k).W), x) + } + + // An (n=16, k=11) Hamming code is naturally encoded as: + // PPxPxxxPxxxxxxxP where P are parity bits and x are data + // Indexes typically start at 1, because then the P are on powers of two + // In systematic coding, you put all the data in the front: + // xxxxxxxxxxxPPPPP + // Indexes typically start at 0, because Computer Science + // For sanity when reading SRAMs, you want systematic form. + + private def impl(n: Int, k: Int) = { + require (n >= 3 && k >= 1 && !isPow2(n)) + val hamm2sys = IndexedSeq.tabulate(n+1) { i => + if (i == 0) { + n /* undefined */ + } else if (isPow2(i)) { + k + log2Ceil(i) + } else { + i - 1 - log2Ceil(i) + } + } + val sys2hamm = hamm2sys.zipWithIndex.sortBy(_._1).map(_._2).toIndexedSeq + def syndrome(j: Int) = { + val bit = 1 << j + ("b" + Seq.tabulate(n) { i => + if ((sys2hamm(i) & bit) != 0) "1" else "0" + }.reverse.mkString).U + } + (hamm2sys, sys2hamm, syndrome _) + } + + def encode(x: UInt, poison: Bool = false.B) = { + val k = x.getWidth + val n = width(k) + val (_, _, syndrome) = impl(n, k) + + require ((poison.isLit && poison.litValue == 0) || poisonous(n), s"SEC code of length ${n} cannot be poisoned") + + /* By setting the entire syndrome on poison, the corrected bit falls off the end of the code */ + val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j)(k-1, 0) & x).xorR ^ poison }.asUInt + Cat(syndromeUInt, x) + } + + def decode(y: UInt) = new Decoding { + val n = y.getWidth + val k = n - log2Ceil(n) + val (_, sys2hamm, syndrome) = impl(n, k) + + val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j) & y).xorR }.asUInt + + val hammBadBitOH = UIntToOH(syndromeUInt, n+1) + val sysBadBitOH = VecInit.tabulate(k) { i => hammBadBitOH(sys2hamm(i)) }.asUInt + + val uncorrected = y(k-1, 0) + val corrected = uncorrected ^ sysBadBitOH + val correctable = syndromeUInt.orR + val uncorrectable = if (poisonous(n)) { syndromeUInt > n.U } else { false.B } + } +} + +class SECDEDCode extends Code +{ + def canDetect = true + def canCorrect = true + + private val sec = new SECCode + private val par = new ParityCode + + def width(k: Int) = sec.width(k)+1 + def eccIndices(w0: Int) = { + (0 until width(w0)).collect { + case i if i >= w0 => i + } + } + def encode(x: UInt, poison: Bool = false.B) = { + // toggling two bits ensures the error is uncorrectable + // to ensure corrected == uncorrected, we pick one redundant + // bit from SEC (the highest); correcting it does not affect + // corrected == uncorrected. the second toggled bit is the + // parity bit, which also does not appear in the decoding + val toggle_lo = Cat(poison.asUInt, poison.asUInt) + val toggle_hi = toggle_lo << (sec.width(x.getWidth)-1) + par.encode(sec.encode(x)) ^ toggle_hi + } + def swizzle(x: UInt) = par.swizzle(sec.swizzle(x)) + def decode(x: UInt) = new Decoding { + val secdec = sec.decode(x(x.getWidth-2,0)) + val pardec = par.decode(x) + + val uncorrected = secdec.uncorrected + val corrected = secdec.corrected + val correctable = pardec.uncorrectable + val uncorrectable = !pardec.uncorrectable && secdec.correctable + } +} + +object ErrGen +{ + // generate a 1-bit error with approximate probability 2^-f + def apply(width: Int, f: Int): UInt = { + require(width > 0 && f >= 0 && log2Up(width) + f <= 16) + UIntToOH(LFSR(16)(log2Up(width)+f-1,0))(width-1,0) + } + def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f) +} + +trait CanHaveErrors extends Bundle { + val correctable: Option[ValidIO[UInt]] + val uncorrectable: Option[ValidIO[UInt]] +} + +case class ECCParams( + bytes: Int = 1, + code: Code = new IdentityCode, + notifyErrors: Boolean = false, + ) + +object Code { + def fromString(s: Option[String]): Code = fromString(s.getOrElse("none")) + def fromString(s: String): Code = s.toLowerCase match { + case "none" => new IdentityCode + case "identity" => new IdentityCode + case "parity" => new ParityCode + case "sec" => new SECCode + case "secded" => new SECDEDCode + case _ => throw new IllegalArgumentException("Unknown ECC type") + } +} \ No newline at end of file From e02d69f340b49bc49baf1ff65a0f41f11b475b97 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 17:46:21 +0800 Subject: [PATCH 059/140] [rocketv] migrate PTW --- rocketv/src/Bundle.scala | 368 ++++++++++++++++++++- rocketv/src/PTW.scala | 688 ++++++++++++++++++--------------------- 2 files changed, 685 insertions(+), 371 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 46b4ea418..d704771e6 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -5,7 +5,7 @@ package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{Cat, log2Ceil} +import chisel3.util.{Cat, Decoupled, Valid, isPow2, log2Ceil} // This file defines Bundle shared in the project. // all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. @@ -592,4 +592,368 @@ class PMACheckerResponse extends Bundle { val aa = Bool() val x = Bool() val eff = Bool() -} \ No newline at end of file +} + + +/** IO between TLB and PTW + * + * PTW receives : + * - PTE request + * - CSRs info + * - pmp results from PMP(in TLB) + */ +class TLBPTWIO(nPMPs: Int, vpnBits: Int, paddrBits: Int, vaddrBits: Int, pgLevels: Int, xLen: Int, maxPAddrBits: Int, pgIdxBits: Int) extends Bundle { + val req = Decoupled(Valid(new PTWReq(vpnBits))) + val resp = Flipped(Valid(new PTWResp(vaddrBits, pgLevels))) + val ptbr = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val hgatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val vsatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val status = Input(new MStatus) + val hstatus = Input(new HStatus) + val gstatus = Input(new MStatus) + val pmp = Input(Vec(nPMPs, new PMP(paddrBits))) + // No customCSR for the first time refactor. + // val customCSRs = Flipped(coreParams.customCSRs) +} + +class PTWReq(vpnBits: Int) extends Bundle { + val addr = UInt(vpnBits.W) + val need_gpa = Bool() + val vstage1 = Bool() + val stage2 = Bool() +} + +/** PTE info from L2TLB to TLB + * + * containing: target PTE, exceptions, two-satge tanslation info + */ +class PTWResp(vaddrBits: Int, pgLevels: Int) extends Bundle { + + /** ptw access exception */ + val ae_ptw = Bool() + + /** final access exception */ + val ae_final = Bool() + + /** page fault */ + val pf = Bool() + + /** guest page fault */ + val gf = Bool() + + /** hypervisor read */ + val hr = Bool() + + /** hypervisor write */ + val hw = Bool() + + /** hypervisor execute */ + val hx = Bool() + + /** PTE to refill L1TLB + * + * source: L2TLB + */ + val pte = new PTE + + /** pte pglevel */ + val level = UInt(log2Ceil(pgLevels).W) + + /** fragmented_superpage support */ + val fragmented_superpage = Bool() + + /** homogeneous for both pma and pmp */ + val homogeneous = Bool() + val gpa = Valid(UInt(vaddrBits.W)) + val gpa_is_pte = Bool() +} + +object PTE { + /** return true if find a pointer to next level page table */ + def table(pte: PTE) = pte.v && !pte.r && !pte.w && !pte.x && !pte.d && !pte.a && !pte.u && pte.reserved_for_future === 0.U + /** return true if find a leaf PTE */ + def leaf(pte: PTE) = pte.v && (pte.r || (pte.x && !pte.w)) && pte.a + /** user read */ + def ur(pte: PTE) = sr(pte) && pte.u + /** user write*/ + def uw(pte: PTE) = sw(pte) && pte.u + /** user execute */ + def ux(pte: PTE) = sx(pte) && pte.u + /** supervisor read */ + def sr(pte: PTE) = leaf(pte) && pte.r + /** supervisor write */ + def sw(pte: PTE) = leaf(pte) && pte.w && pte.d + /** supervisor execute */ + def sx(pte: PTE) = leaf(pte) && pte.x + /** full permission: writable and executable in user mode */ + def isFullPerm(pte: PTE) = uw(pte) && ux(pte) +} + +/** PTE template for transmission + * + * contains useful methods to check PTE attributes + * @see RV-priv spec 4.3.1 for pgae table entry format + */ +class PTE extends Bundle { + val reserved_for_future = UInt(10.W) + val ppn = UInt(44.W) + val reserved_for_software = UInt(2.W) + + /** dirty bit */ + val d = Bool() + + /** access bit */ + val a = Bool() + + /** global mapping */ + val g = Bool() + + /** user mode accessible */ + val u = Bool() + + /** whether the page is executable */ + val x = Bool() + + /** whether the page is writable */ + val w = Bool() + + /** whether the page is readable */ + val r = Bool() + + /** valid bit */ + val v = Bool() +} + + +class HellaCacheIO( + coreMaxAddrBits: Int, + usingVM: Boolean, + untagBits: Int, + pgIdxBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + coreDataBytes: Int, + paddrBits: Int, + vaddrBitsExtended: Int, + separateUncachedResp: Boolean) + extends Bundle { + val req = Decoupled( + new HellaCacheReq(coreMaxAddrBits, usingVM, untagBits, pgIdxBits, dcacheReqTagBits, dcacheArbPorts, coreDataBytes) + ) + val s1_kill = Output(Bool()) // kill previous cycle's req + val s1_data = Output(new HellaCacheWriteData(coreDataBytes)) // data for previous cycle's req + val s2_nack = Input(Bool()) // req from two cycles ago is rejected + val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) + val s2_kill = Output(Bool()) // kill req from two cycles ago + val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO + val s2_paddr = Input(UInt(paddrBits.W)) // translated address + + val resp = Flipped( + Valid( + new HellaCacheResp( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes + ) + ) + ) + val replay_next = Input(Bool()) + val s2_xcpt = Input(new HellaCacheExceptions) + val s2_gpa = Input(UInt(vaddrBitsExtended.W)) + val s2_gpa_is_pte = Input(Bool()) + val uncached_resp = Option.when(separateUncachedResp)( + Flipped( + Decoupled( + new HellaCacheResp( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes + ) + ) + ) + ) + val ordered = Input(Bool()) + val perf = Input(new HellaCachePerfEvents()) + + val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? + val clock_enabled = Input(Bool()) // is D$ currently being clocked? +} + +class HellaCacheReq( + coreMaxAddrBits: Int, + usingVM: Boolean, + untagBits: Int, + pgIdxBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + coreDataBytes: Int) + extends Bundle { + require(isPow2(coreDataBytes)) + val coreDataBits: Int = coreDataBytes * 8 + val M_SZ = 5 + + val phys = Bool() + val no_alloc = Bool() + val no_xcpt = Bool() + + val addr = UInt(coreMaxAddrBits.W) + val idx = Option.when(usingVM && untagBits > pgIdxBits)(UInt(coreMaxAddrBits.W)) + val tag = UInt((dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) + // TODO: handle this uop + val cmd = UInt(M_SZ.W) + val size = UInt(log2Ceil(log2Ceil(coreDataBytes) + 1).W) + val signed = Bool() + // TODO: handle this uop + val dprv = UInt(PRV.SZ.W) + val dv = Bool() + + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + + +class HellaCacheWriteData(coreDataBytes: Int) extends Bundle { + require(isPow2(coreDataBytes)) + val coreDataBits: Int = coreDataBytes * 8 + + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + + +class HellaCacheResp( + coreMaxAddrBits: Int, + usingVM: Boolean, + untagBits: Int, + pgIdxBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + coreDataBytes: Int) + extends Bundle { + require(isPow2(coreDataBytes)) + val coreDataBits: Int = coreDataBytes * 8 + val M_SZ = 5 + + val replay = Bool() + val has_data = Bool() + val data_word_bypass = UInt(coreDataBits.W) + val data_raw = UInt(coreDataBits.W) + val store_data = UInt(coreDataBits.W) + + val addr = UInt(coreMaxAddrBits.W) + val idx = Option.when(usingVM && untagBits > pgIdxBits)(UInt(coreMaxAddrBits.W)) + val tag = UInt((dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) + val cmd = UInt(M_SZ.W) + val size = UInt(log2Ceil(log2Ceil(coreDataBytes) + 1).W) + val signed = Bool() + val dprv = UInt(PRV.SZ.W) + val dv = Bool() + + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions + val pf = new AlignmentExceptions + val gf = new AlignmentExceptions + val ae = new AlignmentExceptions +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCachePerfEvents extends Bundle { + val acquire = Bool() + val release = Bool() + val grant = Bool() + val tlbMiss = Bool() + val blocked = Bool() + val canAcceptStoreThenLoad = Bool() + val canAcceptStoreThenRMW = Bool() + val canAcceptLoadThenLoad = Bool() + val storeBufferEmptyAfterLoad = Bool() + val storeBufferEmptyAfterStore = Bool() +} + +class DatapathPTWIO( + xLen: Int, + maxPAddrBits: Int, + pgIdxBits: Int, + vaddrBits: Int, + asidBits: Int, + nPMPs: Int, + paddrBits: Int) + extends Bundle { + val ptbr = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val hgatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val vsatp = Input(new PTBR(xLen, maxPAddrBits, pgIdxBits)) + val sfence = Flipped(Valid(new SFenceReq(vaddrBits, asidBits))) + val status = Input(new MStatus()) + val hstatus = Input(new HStatus()) + val gstatus = Input(new MStatus()) + val pmp = Input(Vec(nPMPs, new PMP(paddrBits))) + val perf = Output(new PTWPerfEvents()) + // No customCSR for the first time refactor. + // val customCSRs = Flipped(coreParams.customCSRs) + + /** enable clock generated by ptw */ + val clock_enabled = Output(Bool()) +} + +class SFenceReq(vaddrBits: Int, asidBits: Int) extends Bundle { + val rs1 = Bool() + val rs2 = Bool() + val addr = UInt(vaddrBits.W) + val asid = UInt(asidBits.W) + val hv = Bool() + val hg = Bool() +} + +class PTWPerfEvents extends Bundle { + val l2miss = Bool() + val l2hit = Bool() + val pte_miss = Bool() + val pte_hit = Bool() +} + +/** L2TLB PTE template + * + * contains tag bits + * @param nSets number of sets in L2TLB + * @see RV-priv spec 4.3.1 for page table entry format + */ +class L2TLBEntry(nSets: Int, ppnBits: Int, maxSVAddrBits: Int, pgIdxBits: Int, usingHypervisor: Boolean) extends Bundle { + val idxBits = log2Ceil(nSets) + val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0) + val tag = UInt(tagBits.W) + val ppn = UInt(ppnBits.W) + + /** dirty bit */ + val d = Bool() + + /** access bit */ + val a = Bool() + + /** user mode accessible */ + val u = Bool() + + /** whether the page is executable */ + val x = Bool() + + /** whether the page is writable */ + val w = Bool() + + /** whether the page is readable */ + val r = Bool() +} diff --git a/rocketv/src/PTW.scala b/rocketv/src/PTW.scala index 87ef93057..745e4aa66 100644 --- a/rocketv/src/PTW.scala +++ b/rocketv/src/PTW.scala @@ -1,236 +1,133 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{ - is, - isPow2, - log2Ceil, - switch, - Arbiter, - Cat, - Decoupled, - Enum, - Mux1H, - OHToUInt, - PopCount, - PriorityEncoder, - PriorityEncoderOH, - RegEnable, - UIntToOH, - Valid -} -import chisel3.withClock -import chisel3.experimental.SourceInfo -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.subsystem.CacheBlockBytes -import freechips.rocketchip.tile._ -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property - -import scala.collection.mutable.ListBuffer - -/** PTE request from TLB to PTW - * - * TLB send a PTE request to PTW when L1TLB miss - */ -class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { - val addr = UInt(vpnBits.W) - val need_gpa = Bool() - val vstage1 = Bool() - val stage2 = Bool() -} - -/** PTE info from L2TLB to TLB - * - * containing: target PTE, exceptions, two-satge tanslation info - */ -class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { - - /** ptw access exception */ - val ae_ptw = Bool() - - /** final access exception */ - val ae_final = Bool() - - /** page fault */ - val pf = Bool() - - /** guest page fault */ - val gf = Bool() - - /** hypervisor read */ - val hr = Bool() - - /** hypervisor write */ - val hw = Bool() +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.circt.ClockGate +import chisel3.util.{Arbiter, Cat, Enum, Mux1H, OHToUInt, PopCount, PriorityEncoder, PriorityEncoderOH, RegEnable, SRAM, SRAMInterface, UIntToOH, Valid, is, isPow2, log2Ceil, switch} - /** hypervisor execute */ - val hx = Bool() - - /** PTE to refill L1TLB - * - * source: L2TLB - */ - val pte = new PTE - - /** pte pglevel */ - val level = UInt(log2Ceil(pgLevels).W) - - /** fragmented_superpage support */ - val fragmented_superpage = Bool() - - /** homogeneous for both pma and pmp */ - val homogeneous = Bool() - val gpa = Valid(UInt(vaddrBits.W)) - val gpa_is_pte = Bool() -} - -/** IO between TLB and PTW - * - * PTW receives : - * - PTE request - * - CSRs info - * - pmp results from PMP(in TLB) - */ -class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { - val req = Decoupled(Valid(new PTWReq)) - val resp = Flipped(Valid(new PTWResp)) - val ptbr = Input(new PTBR()) - val hgatp = Input(new PTBR()) - val vsatp = Input(new PTBR()) - val status = Input(new MStatus()) - val hstatus = Input(new HStatus()) - val gstatus = Input(new MStatus()) - val pmp = Input(Vec(nPMPs, new PMP)) - val customCSRs = Flipped(coreParams.customCSRs) +object PTWParameter { + implicit def rwP: upickle.default.ReadWriter[PTWParameter] = upickle.default.macroRW[PTWParameter] } -/** PTW performance statistics */ -class PTWPerfEvents extends Bundle { - val l2miss = Bool() - val l2hit = Bool() - val pte_miss = Bool() - val pte_hit = Bool() -} - -/** Datapath IO between PTW and Core - * - * PTW receives CSRs info, pmp checks, sfence instruction info - * - * PTW sends its performance statistics to core - */ -class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { - val ptbr = Input(new PTBR()) - val hgatp = Input(new PTBR()) - val vsatp = Input(new PTBR()) - val sfence = Flipped(Valid(new SFenceReq)) - val status = Input(new MStatus()) - val hstatus = Input(new HStatus()) - val gstatus = Input(new MStatus()) - val pmp = Input(Vec(nPMPs, new PMP)) - val perf = Output(new PTWPerfEvents()) - val customCSRs = Flipped(coreParams.customCSRs) - - /** enable clock generated by ptw */ - val clock_enabled = Output(Bool()) -} - -/** PTE template for transmission - * - * contains useful methods to check PTE attributes - * @see RV-priv spec 4.3.1 for pgae table entry format - */ -class PTE(implicit p: Parameters) extends CoreBundle()(p) { - val reserved_for_future = UInt(10.W) - val ppn = UInt(44.W) - val reserved_for_software = Bits(2.W) - - /** dirty bit */ - val d = Bool() - - /** access bit */ - val a = Bool() - - /** global mapping */ - val g = Bool() - - /** user mode accessible */ - val u = Bool() - - /** whether the page is executable */ - val x = Bool() - - /** whether the page is writable */ - val w = Bool() - - /** whether the page is readable */ - val r = Bool() - - /** valid bit */ - val v = Bool() - - /** return true if find a pointer to next level page table */ - def table(dummy: Int = 0) = v && !r && !w && !x && !d && !a && !u && reserved_for_future === 0.U - - /** return true if find a leaf PTE */ - def leaf(dummy: Int = 0) = v && (r || (x && !w)) && a - - /** user read */ - def ur(dummy: Int = 0) = sr() && u - - /** user write */ - def uw(dummy: Int = 0) = sw() && u - - /** user execute */ - def ux(dummy: Int = 0) = sx() && u - - /** supervisor read */ - def sr(dummy: Int = 0) = leaf() && r - - /** supervisor write */ - def sw(dummy: Int = 0) = leaf() && w && d - - /** supervisor execute */ - def sx(dummy: Int = 0) = leaf() && x - - /** full permission: writable and executable in user mode */ - def isFullPerm(dummy: Int = 0) = uw() && ux() +case class PTWParameter(useAsyncReset: Boolean, + hasClockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen: Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int, + ) extends SerializableModuleParameter { + def pmpGranularity: Int = if (usingHypervisor) 4096 else 4 + // TODO: configurable + def cacheBlockBytes = 64 + def lgCacheBlockBytes = log2Ceil(cacheBlockBytes) + def blockOffBits = lgCacheBlockBytes + def nL2TLBSets = nL2TLBEntries / nL2TLBWays + def idxBits: Int = if (nL2TLBSets == 0) 0 else log2Ceil(nL2TLBSets) + def untagBits: Int = blockOffBits + idxBits + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + def maxHypervisorExtraAddrBits: Int = 2 + def pgIdxBits: Int = 12 + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def dcacheReqTagBits: Int = 6 + def separateUncachedResp: Boolean = false + + // @todo make it true in the future. + def usingDTIM: Boolean = false + def dcacheArbPorts: Int = 1 + (if (usingVM) 1 else 0) + (if (usingDTIM) 1 else 0) + def coreDataBytes: Int = xLen max fLen + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + ( if(usingHypervisor) 1 else 0) else 0) + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + def vpnBits: Int = vaddrBits - pgIdxBits + def ppnBits: Int = paddrBits - pgIdxBits + def hypervisorExtraAddrBits: Int = { + if (usingHypervisor) maxHypervisorExtraAddrBits + else 0 + } + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = + if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def minPgLevels: Int = { + val res = xLen match { case 32 => 2; case 64 => 3 } + require(pgLevels >= res) + res + } + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + // I$ + D$ + def nPTWPorts: Int = 2 } -/** L2TLB PTE template - * - * contains tag bits - * @param nSets number of sets in L2TLB - * @see RV-priv spec 4.3.1 for page table entry format - */ -class L2TLBEntry(nSets: Int)(implicit p: Parameters) extends CoreBundle()(p) with HasCoreParameters { - val idxBits = log2Ceil(nSets) - val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0) - val tag = UInt(tagBits.W) - val ppn = UInt(ppnBits.W) - - /** dirty bit */ - val d = Bool() - - /** access bit */ - val a = Bool() - - /** user mode accessible */ - val u = Bool() - - /** whether the page is executable */ - val x = Bool() - - /** whether the page is writable */ - val w = Bool() - - /** whether the page is readable */ - val r = Bool() - +class PTWInterface(parameter: PTWParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + + /** to n TLB */ + val requestor = Flipped( + Vec( + parameter.nPTWPorts, + new TLBPTWIO( + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.vaddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits + ) + ) + ) + + /** to HellaCache */ + val mem = new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) + + /** to Core + * + * contains CSRs info and performance statistics + */ + val dpath = new DatapathPTWIO( + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits, + parameter.vaddrBits, + parameter.asidBits, + parameter.nPMPs, + parameter.paddrBits + ) } /** PTW contains L2TLB, and performs page table walk for high level TLB, and cache queries from L1 TLBs(I$, D$, RoCC) @@ -268,28 +165,96 @@ class L2TLBEntry(nSets: Int)(implicit p: Parameters) extends CoreBundle()(p) wit * @see RV-priv spec 8.5 for Two-Stage Address Translation * @todo details in two-stage translation */ -class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) { - val io = IO(new Bundle { +@instantiable +class PTW(val parameter: PTWParameter) + extends FixedIORawModule(new PTWInterface(parameter)) + with SerializableModule[PTWParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val vpnBits: Int = parameter.vpnBits + val ppnBits: Int = parameter.ppnBits + val vaddrBits: Int = parameter.vaddrBits + val paddrBits: Int = parameter.paddrBits + val n: Int = parameter.nPTWPorts + val pgLevels: Int = parameter.pgLevels + val pgLevelBits: Int = parameter.pgLevelBits + val minPgLevels: Int = parameter.minPgLevels + val hypervisorExtraAddrBits: Int = parameter.hypervisorExtraAddrBits + val usingHypervisor: Boolean = parameter.usingHypervisor + val xLen: Int = parameter.xLen + val maxPAddrBits: Int = parameter.maxPAddrBits + val pgIdxBits: Int = parameter.pgIdxBits + val maxSVAddrBits: Int = parameter.maxSVAddrBits + val pmpGranularity: Int = parameter.pmpGranularity + val usingVM: Boolean = parameter.usingVM + val hasClockGate: Boolean = parameter.hasClockGate + val maxHypervisorExtraAddrBits: Int = parameter.maxHypervisorExtraAddrBits + def M_XRD = "b00000".U + + object coreParams { + val nPTECacheEntries: Int = parameter.nPTECacheEntries + val nL2TLBWays: Int = parameter.nL2TLBWays + val nL2TLBEntries: Int = parameter.nL2TLBEntries + } - /** to n TLB */ - val requestor = Flipped(Vec(n, new TLBPTWIO)) + def OptimizationBarrier[T <: Data](in: T): T = { + val barrier = Module(new Module { + val io = IO(new Bundle { + val x = Input(chiselTypeOf(in)) + val y = Output(chiselTypeOf(in)) + }) + io.y := io.x + override def desiredName = "OptimizationBarrier" + }) + barrier.io.x := in + barrier.io.y + } - /** to HellaCache */ - val mem = new HellaCacheIO + def additionalPgLevels(ptbr: PTBR) = ptbr.mode(log2Ceil(pgLevels - minPgLevels + 1) - 1, 0) - /** to Core - * - * contains CSRs info and performance statistics - */ - val dpath = new DatapathPTWIO - }) + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } + + + class PMPHomogeneityChecker(pmps: Seq[PMP]) { + def apply(addr: UInt, pgLevel: UInt): Bool = { + pmps + .foldLeft((true.B, 0.U.asTypeOf(chiselTypeOf(pmps.head)))) { + case ((h, prev), pmp) => + (h && PMP.homogeneous(pmp, addr, pgLevel, prev, paddrBits, pmpGranularity, pgLevels, pgIdxBits, pgLevelBits), pmp) + } + ._1 + } + } + + // compatiblity mode + object Split { + def apply(x: UInt, n0: Int) = { + val w = x.getWidth + (x(w-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n1), x(n1-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n2: Int, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0)) + } + } val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(8) val state = RegInit(s_ready) val l2_refill_wire = Wire(Bool()) /** Arbiter to arbite request from n TLB */ - val arb = Module(new Arbiter(Valid(new PTWReq), n)) + val arb = Module(new Arbiter(Valid(new PTWReq(vpnBits)), n)) // use TLB req as arbitor's input arb.io.in <> io.requestor.map(_.req) // receive req only when s_ready and not in refill @@ -297,12 +262,13 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val resp_valid = RegNext(VecInit(Seq.fill(io.requestor.size)(false.B))) - val clock_en = - state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate + // val clock_en = + // state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate + val clock_en = state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid io.dpath.clock_enabled := usingVM.B && clock_en - val gated_clock = - if (!usingVM || !tileParams.dcache.get.clockGate) clock - else ClockGate(clock, clock_en, "ptw_clock_gate") + val gated_clock: Clock = + if (!usingVM || !hasClockGate) io.clock + else ClockGate(io.clock, clock_en) withClock(gated_clock) { // entering gated-clock domain val invalidated = Reg(Bool()) @@ -325,27 +291,29 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val resp_fragmented_superpage = Reg(Bool()) /** tlb request */ - val r_req = Reg(new PTWReq) + val r_req = Reg(new PTWReq(vpnBits)) /** current selected way in arbitor */ val r_req_dest = Reg(Bits()) // to respond to L1TLB : l2_hit // to construct mem.req.addr val r_pte = Reg(new PTE) - val r_hgatp = Reg(new PTBR) + val r_hgatp = Reg(new PTBR(xLen, maxPAddrBits, pgIdxBits)) // 2-stage pageLevel val aux_count = Reg(UInt(log2Ceil(pgLevels).W)) /** pte for 2-stage translation */ val aux_pte = Reg(new PTE) - val aux_ppn_hi = (pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth) - .option(Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W))) + val aux_ppn_hi = Option.when(pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth)( + Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W)) + ) val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case val stage2 = Reg(Bool()) val stage2_final = Reg(Bool()) val satp = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp, io.dpath.ptbr) - val r_hgatp_initial_count = pgLevels.U - minPgLevels.U - r_hgatp.additionalPgLevels + + val r_hgatp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(r_hgatp) /** 2-stage translation both enable */ val do_both_stages = r_req.vstage1 && r_req.stage2 @@ -363,7 +331,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } } // construct pte from mem.resp - val (pte, invalid_paddr) = { + val (pte: PTE, invalid_paddr: Bool) = { val tmp = mem_resp_data.asTypeOf(new PTE()) val res = WireDefault(tmp) res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth) - 1, 0), tmp.ppn(ppnBits - 1, 0)) @@ -377,23 +345,25 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits) =/= 0.U, (tmp.ppn >> ppnBits) =/= 0.U)) } // find non-leaf PTE, need traverse - val traverse = pte.table() && !invalid_paddr && count < (pgLevels - 1).U + def table(pte: PTE) = + pte.v && !pte.r && !pte.w && !pte.x && !pte.d && !pte.a && !pte.u && pte.reserved_for_future === 0.U + val traverse = table(pte) && !invalid_paddr && count < (pgLevels - 1).U /** address send to mem for enquerry */ val pte_addr = if (!usingVM) 0.U else { - val vpn_idxs = (0 until pgLevels).map { i => + val vpn_idxs = VecInit((0 until pgLevels).map { i => val width = pgLevelBits + (if (i <= pgLevels - minPgLevels) hypervisorExtraAddrBits else 0) (vpn >> (pgLevels - i - 1) * pgLevelBits)(width - 1, 0) - } + }) val mask = Mux( stage2 && count === r_hgatp_initial_count, ((1 << (hypervisorExtraAddrBits + pgLevelBits)) - 1).U, ((1 << pgLevelBits) - 1).U ) val vpn_idx = vpn_idxs(count) & mask - val raw_pte_addr = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8) + val raw_pte_addr: UInt = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8) val size = if (usingHypervisor) vaddrBits else paddrBits //use r_pte.ppn as page table base address //use vpn slice as offset @@ -404,13 +374,13 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val pte_cache_addr = if (!usingHypervisor) pte_addr else { - val vpn_idxs = (0 until pgLevels - 1).map { i => + val vpn_idxs = VecInit((0 until pgLevels - 1).map { i => val ext_aux_pte_ppn = aux_ppn_hi match { case None => aux_pte.ppn case Some(hi) => Cat(hi, aux_pte.ppn) } (ext_aux_pte_ppn >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0) - } + }) val vpn_idx = vpn_idxs(count) val raw_pte_cache_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen / 8) raw_pte_cache_addr(vaddrBits.min(raw_pte_cache_addr.getWidth) - 1, 0) @@ -420,17 +390,22 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val stage2_pte_cache_addr = if (!usingHypervisor) 0.U else { - val vpn_idxs = (0 until pgLevels - 1).map { i => + val vpn_idxs = VecInit((0 until pgLevels - 1).map { i => (r_req.addr >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0) - } + }) val vpn_idx = vpn_idxs(aux_count) val raw_s2_pte_cache_addr = Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8) raw_s2_pte_cache_addr(vaddrBits.min(raw_s2_pte_cache_addr.getWidth) - 1, 0) } def makeFragmentedSuperpagePPN(ppn: UInt): Seq[UInt] = { + def padTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(0.U((n - x.getWidth).W), x) + } (pgLevels - 1 until 0 by -1).map(i => - Cat(ppn >> (pgLevelBits * i), r_req.addr(((pgLevelBits * i).min(vpnBits)) - 1, 0).padTo(pgLevelBits * i)) + Cat(ppn >> (pgLevelBits * i), padTo(r_req.addr(((pgLevelBits * i).min(vpnBits)) - 1, 0), (pgLevelBits * i))) ) } @@ -452,11 +427,12 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val can_refill = if (s2) do_both_stages && !stage2 && !stage2_final else can_hit + val tag = - if (s2) Cat(true.B, stage2_pte_cache_addr.padTo(vaddrBits)) - else Cat(r_req.vstage1, pte_cache_addr.padTo(if (usingHypervisor) vaddrBits else paddrBits)) + if (s2) Cat(true.B, padTo(stage2_pte_cache_addr, vaddrBits)) + else Cat(r_req.vstage1, padTo(pte_cache_addr, if (usingHypervisor) vaddrBits else paddrBits)) - val hits = tags.map(_ === tag).asUInt & valid + val hits = VecInit(tags.map(_ === tag)).asUInt & valid val hit = hits.orR && can_hit // refill with mem response when(mem_resp_valid && traverse && can_refill && !hits.orR && !invalidated) { @@ -473,9 +449,9 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } val lcount = if (s2) aux_count else count - for (i <- 0 until pgLevels - 1) { - ccover(hit && state === s_req && lcount === i.U, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i") - } + // for (i <- 0 until pgLevels - 1) { + // ccover(hit && state === s_req && lcount === i.U, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i") + // } (hit, Mux1H(hits, data)) } @@ -509,12 +485,12 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val idxBits = log2Ceil(nL2TLBSets) val l2_plru = new SetAssocLRU(nL2TLBSets, coreParams.nL2TLBWays, "plru") - - val ram = DescribedSRAM( - name = "l2_tlb_ram", - desc = "L2 TLB", + val ram: SRAMInterface[Vec[UInt]] = SRAM.masked( size = nL2TLBSets, - data = Vec(coreParams.nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets).getWidth).W)) + tpe = Vec(coreParams.nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets, ppnBits, maxSVAddrBits, pgIdxBits, usingHypervisor).getWidth).W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 ) val g = Reg(Vec(coreParams.nL2TLBWays, UInt(nL2TLBSets.W))) @@ -523,37 +499,34 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val (r_tag, r_idx) = Split(Cat(r_req.vstage1, r_req.addr(maxSVAddrBits - pgIdxBits - 1, 0)), idxBits) /** the valid vec for the selected set(including n ways) */ - val r_valid_vec = valid.map(_(r_idx)).asUInt + val r_valid_vec = VecInit(valid.map(_(r_idx))).asUInt val r_valid_vec_q = Reg(UInt(coreParams.nL2TLBWays.W)) val r_l2_plru_way = Reg(UInt(log2Ceil(coreParams.nL2TLBWays.max(1)).W)) r_valid_vec_q := r_valid_vec // replacement way r_l2_plru_way := (if (coreParams.nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U) // refill with r_pte(leaf pte) - when(l2_refill && !invalidated) { - val entry = Wire(new L2TLBEntry(nL2TLBSets)) - entry.ppn := r_pte.ppn - entry.d := r_pte.d - entry.a := r_pte.a - entry.u := r_pte.u - entry.x := r_pte.x - entry.w := r_pte.w - entry.r := r_pte.r - entry.tag := r_tag - // if all the way are valid, use plru to select one way to be replaced, - // otherwise use PriorityEncoderOH to select one - val wmask = - if (coreParams.nL2TLBWays > 1) - Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, coreParams.nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) - else 1.U(1.W) - ram.write(r_idx, VecInit(Seq.fill(coreParams.nL2TLBWays)(code.encode(entry.asUInt))), wmask.asBools) - - val mask = UIntToOH(r_idx) - for (way <- 0 until coreParams.nL2TLBWays) { - when(wmask(way)) { - valid(way) := valid(way) | mask - g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask) - } + val entry = Wire(new L2TLBEntry(nL2TLBSets, ppnBits, maxSVAddrBits, pgIdxBits, usingHypervisor)) + entry.ppn := r_pte.ppn + entry.d := r_pte.d + entry.a := r_pte.a + entry.u := r_pte.u + entry.x := r_pte.x + entry.w := r_pte.w + entry.r := r_pte.r + entry.tag := r_tag + // if all the way are valid, use plru to select one way to be replaced, + // otherwise use PriorityEncoderOH to select one + val wmask = + if (coreParams.nL2TLBWays > 1) + Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, coreParams.nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) + else 1.U(1.W) + + val mask = UIntToOH(r_idx) + for (way <- 0 until coreParams.nL2TLBWays) { + when(wmask(way)) { + valid(way) := valid(way) | mask + g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask) } } // sfence happens @@ -574,18 +547,26 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid) val s2_valid = RegNext(s1_valid) // read from tlb idx - val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits - 1, 0), s0_valid) + val s1_rdata = ram.readwritePorts.head.readData val s2_rdata = s1_rdata.map(s1_rdway => code.decode(RegEnable(s1_rdway, s1_valid))) val s2_valid_vec = RegEnable(r_valid_vec, s1_valid) val s2_g_vec = RegEnable(VecInit(g.map(_(r_idx))), s1_valid) - val s2_error = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR + val s2_error = VecInit((0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error)).asUInt.orR when(s2_valid && s2_error) { valid.foreach { _ := 0.U } } + // ram connect + ram.readwritePorts.foreach { ramPort => + ramPort.enable := (l2_refill && !invalidated) || s0_valid + ramPort.isWrite := (l2_refill && !invalidated) + ramPort.address := Mux(l2_refill && !invalidated, r_idx, arb.io.out.bits.bits.addr(idxBits - 1, 0)) + ramPort.writeData := VecInit(Seq.fill(coreParams.nL2TLBWays)(code.encode(entry.asUInt))) + ramPort.mask.foreach(_ := VecInit(wmask.asBools)) + } // decode - val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets))) + val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets, ppnBits, maxSVAddrBits, pgIdxBits, usingHypervisor))) val s2_hit_vec = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag)) - val s2_hit = s2_valid && s2_hit_vec.orR - io.dpath.perf.l2miss := s2_valid && !(s2_hit_vec.orR) + val s2_hit = s2_valid && VecInit(s2_hit_vec).asUInt.orR + io.dpath.perf.l2miss := s2_valid && !(VecInit(s2_hit_vec).asUInt.orR) io.dpath.perf.l2hit := s2_hit when(s2_hit) { l2_plru.access(r_idx, OHToUInt(s2_hit_vec)) @@ -606,9 +587,9 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( s2_pte.reserved_for_future := 0.U s2_pte.reserved_for_software := 0.U - for (way <- 0 until coreParams.nL2TLBWays) { - ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way") - } +// for (way <- 0 until coreParams.nL2TLBWays) { +// ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way") +// } (s2_hit, s2_error, s2_pte, Some(ram)) } @@ -640,19 +621,21 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits) require(!usingHypervisor || pageGranularityPMPs, s"hypervisor requires pmpGranularity >= ${1 << pgIdxBits}") - val pmaPgLevelHomogeneous = (0 until pgLevels).map { i => + val pmaPgLevelHomogeneous: Seq[Bool] = (0 until pgLevels).map { i => val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)) if (pageGranularityPMPs && i == pgLevels - 1) { require( - TLBPageLookup.homogeneous(edge.manager.managers, pgSize), +// TLBPageLookup.homogeneous(edge.manager.managers, pgSize), + true, s"All memory regions must be $pgSize-byte aligned" ) true.B } else { - TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous +// TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous + true.B } } - val pmaHomogeneous = pmaPgLevelHomogeneous(count) + val pmaHomogeneous = VecInit(pmaPgLevelHomogeneous)(count) val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp).apply(r_pte.ppn << pgIdxBits, count) val homogeneous = pmaHomogeneous && pmpHomogeneous // response to tlb @@ -675,7 +658,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( Mux( !stage2_final || !r_req.vstage1 || aux_count === (pgLevels - 1).U, aux_pte.ppn, - makeFragmentedSuperpagePPN(aux_pte.ppn)(aux_count) + VecInit(makeFragmentedSuperpagePPN(aux_pte.ppn))(aux_count) ), gpa_pgoff ) @@ -683,7 +666,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( io.requestor(i).ptbr := io.dpath.ptbr io.requestor(i).hgatp := io.dpath.hgatp io.requestor(i).vsatp := io.dpath.vsatp - io.requestor(i).customCSRs <> io.dpath.customCSRs +// io.requestor(i).customCSRs <> io.dpath.customCSRs io.requestor(i).status := io.dpath.status io.requestor(i).hstatus := io.dpath.hstatus io.requestor(i).gstatus := io.dpath.gstatus @@ -698,9 +681,9 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( switch(state) { is(s_ready) { when(arb.io.out.fire) { - val satp_initial_count = pgLevels.U - minPgLevels.U - satp.additionalPgLevels - val vsatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.vsatp.additionalPgLevels - val hgatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.hgatp.additionalPgLevels + val satp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(satp) + val vsatp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(io.dpath.vsatp) + val hgatp_initial_count = pgLevels.U - minPgLevels.U - additionalPgLevels(io.dpath.hgatp) val aux_ppn = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp.ppn, arb.io.out.bits.bits.addr) r_req := arb.io.out.bits.bits @@ -728,7 +711,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } is(s_req) { when(stage2 && count === r_hgatp_initial_count) { - gpa_pgoff := Mux(aux_count === (pgLevels - 1).U, r_req.addr << (xLen / 8).log2, stage2_pte_cache_addr) + gpa_pgoff := Mux(aux_count === (pgLevels - 1).U, r_req.addr << log2Ceil(xLen / 8), stage2_pte_cache_addr) } // pte_cache hit when(stage2_pte_cache_hit) { @@ -774,14 +757,14 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val superpage_masks = (0 until pgLevels).map(i => ((BigInt(1) << pte.ppn.getWidth) - (BigInt(1) << (pgLevels - 1 - i) * pgLevelBits)).U ) - val superpage_mask = superpage_masks(Mux(stage2_final, max_count, (pgLevels - 1).U)) + val superpage_mask = VecInit(superpage_masks)(Mux(stage2_final, max_count, (pgLevels - 1).U)) val stage1_ppns = (0 until pgLevels - 1).map(i => Cat( pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), aux_pte.ppn((pgLevels - i - 1) * pgLevelBits - 1, 0) ) ) :+ pte.ppn - val stage1_ppn = stage1_ppns(count) + val stage1_ppn = VecInit(stage1_ppns)(count) makePTE(stage1_ppn & superpage_mask, aux_pte) } @@ -809,7 +792,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( // fragment_superpage Mux( state === s_fragment_superpage && !homogeneous && count =/= (pgLevels - 1).U, - makePTE(makeFragmentedSuperpagePPN(r_pte.ppn)(count), r_pte), + makePTE(VecInit(makeFragmentedSuperpagePPN(r_pte.ppn))(count), r_pte), // when tlb request come->request mem, use root address in satp(or vsatp,hgatp) Mux( arb.io.out.fire, @@ -841,7 +824,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( when(do_both_stages && !stage2) { do_switch := true.B } count := count + 1.U }.otherwise { - val gf = stage2 && !stage2_final && !pte.ur() + val gf = stage2 && !stage2_final && !PTE.ur(pte) val ae = pte.v && invalid_paddr val pf = pte.v && pte.reserved_for_future =/= 0.U val success = pte.v && !ae && !pf && !gf @@ -858,7 +841,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( // find a leaf pte, start l2 refill l2_refill := success && count === (pgLevels - 1).U && !r_req.need_gpa && (!r_req.vstage1 && !r_req.stage2 || - do_both_stages && aux_count === (pgLevels - 1).U && pte.isFullPerm()) + do_both_stages && aux_count === (pgLevels - 1).U && PTE.isFullPerm(pte)) count := max_count when( @@ -870,13 +853,13 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( resp_valid(r_req_dest) := true.B } - resp_ae_ptw := ae && count < (pgLevels - 1).U && pte.table() + resp_ae_ptw := ae && count < (pgLevels - 1).U && PTE.table(pte) resp_ae_final := ae resp_pf := pf && !stage2 resp_gf := gf || (pf && stage2) - resp_hr := !stage2 || (!pf && !gf && pte.ur()) - resp_hw := !stage2 || (!pf && !gf && pte.uw()) - resp_hx := !stage2 || (!pf && !gf && pte.ux()) + resp_hr := !stage2 || (!pf && !gf && PTE.ur(pte)) + resp_hw := !stage2 || (!pf && !gf && PTE.uw(pte)) + resp_hx := !stage2 || (!pf && !gf && PTE.ux(pte)) } } } @@ -891,43 +874,26 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( aux_pte := Mux( traverse, pte, { - val s1_ppns = (0 until pgLevels - 1).map(i => - Cat( - pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), - r_req - .addr((((pgLevels - i - 1) * pgLevelBits).min(vpnBits)) - 1, 0) - .padTo((pgLevels - i - 1) * pgLevelBits) - ) - ) :+ pte.ppn + val s1_ppns = VecInit( + (0 until pgLevels - 1).map(i => + Cat( + pte.ppn(pte.ppn.getWidth - 1, (pgLevels - i - 1) * pgLevelBits), + padTo( + r_req + .addr((((pgLevels - i - 1) * pgLevelBits).min(vpnBits)) - 1, 0), + ((pgLevels - i - 1) * pgLevelBits) + ) + ) + ) :+ pte.ppn + ) makePTE(s1_ppns(count), pte) } ) aux_ppn_hi.foreach { _ := 0.U } stage2 := true.B } - - for (i <- 0 until pgLevels) { - val leaf = mem_resp_valid && !traverse && count === i.U - ccover( - leaf && pte.v && !invalid_paddr && pte.reserved_for_future === 0.U, - s"L$i", - s"successful page-table access, level $i" - ) - ccover(leaf && pte.v && invalid_paddr, s"L${i}_BAD_PPN_MSB", s"PPN too large, level $i") - ccover(leaf && pte.v && pte.reserved_for_future =/= 0.U, s"L${i}_BAD_RSV_MSB", s"reserved MSBs set, level $i") - ccover(leaf && !mem_resp_data(0), s"L${i}_INVALID_PTE", s"page not present, level $i") - if (i != pgLevels - 1) - ccover(leaf && !pte.v && mem_resp_data(0), s"L${i}_BAD_PPN_LSB", s"PPN LSBs not zero, level $i") - } - ccover(mem_resp_valid && count === (pgLevels - 1).U && pte.table(), s"TOO_DEEP", s"page table too deep") - ccover(io.mem.s2_nack, "NACK", "D$ nacked page-table access") - ccover(state === s_wait2 && io.mem.s2_xcpt.ae.ld, "AE", "access exception while walking page table") - } // leaving gated-clock domain - private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - if (usingVM) property.cover(cond, s"PTW_$label", "MemorySystem;;" + desc) - /** Relace PTE.ppn with ppn */ private def makePTE(ppn: UInt, default: PTE) = { val pte = WireDefault(default) @@ -937,28 +903,12 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( /** use hgatp and vpn to construct a new ppn */ private def makeHypervisorRootPTE(hgatp: PTBR, vpn: UInt, default: PTE) = { - val count = pgLevels.U - minPgLevels.U - hgatp.additionalPgLevels - val idxs = (0 to pgLevels - minPgLevels).map(i => (vpn >> (pgLevels - i) * pgLevelBits)) + val count = pgLevels.U - minPgLevels.U - additionalPgLevels(hgatp) + val idxs = VecInit((0 to pgLevels - minPgLevels).map(i => (vpn >> (pgLevels - i) * pgLevelBits))) val lsbs = WireDefault(UInt(maxHypervisorExtraAddrBits.W), idxs(count)) val pte = WireDefault(default) pte.ppn := Cat(hgatp.ppn >> maxHypervisorExtraAddrBits, lsbs) pte } -} -/** Mix-ins for constructing tiles that might have a PTW */ -trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile => - val module: CanHavePTWModule - var nPTWPorts = 1 - nDCachePorts += usingPTW.toInt -} - -trait CanHavePTWModule extends HasHellaCacheModule { - val outer: CanHavePTW - val ptwPorts = ListBuffer(outer.dcache.module.io.ptw) - val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p)) - ptw.io.mem <> DontCare - if (outer.usingPTW) { - dcachePorts += ptw.io.mem - } } From c2f9b49f3c09b6a494af50953ef59dfa7913ffb7 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 30 Jun 2024 18:05:15 +0800 Subject: [PATCH 060/140] [rocketv] add elaborator for PTW - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PTW config --useAsyncReset false --hasClockGate true --usingVM false --usingHypervisor false --xLen 32 --fLen 32 --paddrBits 32 --asidBits 0 --pgLevels 2 --nPTECacheEntries 8 --nL2TLBWays 1 --nL2TLBEntries 0 --nPMPs 9 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.PTW design --parameter ./PTW.json --run-firtool --- elaborator/src/rocketv/PTW.scala | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 elaborator/src/rocketv/PTW.scala diff --git a/elaborator/src/rocketv/PTW.scala b/elaborator/src/rocketv/PTW.scala new file mode 100644 index 000000000..4a20e110f --- /dev/null +++ b/elaborator/src/rocketv/PTW.scala @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{PTW, PTWParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object PTW extends Elaborator { + @main + case class PTWParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "hasClockGate") hasClockGate: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingHypervisor") usingHypervisor: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "pgLevels") pgLevels: Int, + @arg(name = "nPTECacheEntries") nPTECacheEntries: Int, + @arg(name = "nL2TLBWays") nL2TLBWays: Int, + @arg(name = "nL2TLBEntries") nL2TLBEntries: Int, + @arg(name = "nPMPs") nPMPs: Int) { + def convert: PTWParameter = PTWParameter( + useAsyncReset, + hasClockGate, + usingVM, + usingHypervisor, + xLen, + fLen, + paddrBits, + asidBits, + pgLevels, + nPTECacheEntries, + nL2TLBWays, + nL2TLBEntries, + nPMPs + ) + } + + implicit def PTWParameterMainParser: ParserForClass[PTWParameterMain] = ParserForClass[PTWParameterMain] + + @main + def config(@arg(name = "parameter") parameter: PTWParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[PTW, PTWParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From acbf677909fcd6e5ae853e5c6e7eaee5c8d17685 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Fri, 28 Jun 2024 13:25:01 +0800 Subject: [PATCH 061/140] [rocketv] copy ICache into rocketv project --- rocketv/src/ICache.scala | 970 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 970 insertions(+) create mode 100644 rocketv/src/ICache.scala diff --git a/rocketv/src/ICache.scala b/rocketv/src/ICache.scala new file mode 100644 index 000000000..08b5812ff --- /dev/null +++ b/rocketv/src/ICache.scala @@ -0,0 +1,970 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util.{isPow2, log2Ceil, log2Up, Cat, Decoupled, Mux1H, OHToUInt, PopCount, RegEnable, Valid} +import freechips.rocketchip.amba._ +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util.{DescribedSRAM, _} +import freechips.rocketchip.util.property +import chisel3.experimental.SourceInfo +import chisel3.dontTouch +import chisel3.util.random.LFSR +// TODO: get rid of it. +import freechips.rocketchip.rocket.ICacheParams + +trait HasL1ICacheParameters extends HasL1CacheParameters with HasCoreParameters { + val cacheParams = tileParams.icache.get +} + +class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICacheParameters { + val addr = UInt(vaddrBits.W) +} + +class ICacheErrors(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICacheParameters with CanHaveErrors { + val correctable = (cacheParams.tagCode.canDetect || cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) + val uncorrectable = (cacheParams.itimAddr.nonEmpty && cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) + val bus = Valid(UInt(paddrBits.W)) +} + +/** [[ICache]] is a set associated cache I$(Instruction Cache) of Rocket. + * {{{ + * Keywords: Set-associated + * 3 stage pipeline + * Virtually-Indexed Physically-Tagged (VIPT) + * Parallel access to tag and data SRAM + * Random replacement algorithm + * Optional Features: + * Prefetch + * ECC + * Instruction Tightly Integrated Memory(ITIM)}}} + * {{{ + * PipeLine: + * Stage 0 : access data and tag SRAM in parallel + * Stage 1 : receive paddr from CPU + * compare tag and paddr when the entry is valid + * if hit : pick up the target instruction + * if miss : start refilling in stage 2 + * Stage 2 : respond to CPU or start a refill}}} + * {{{ + * Note: Page size = 4KB thus paddr[11:0] = vaddr[11:0] + * considering sets = 64, cachelineBytes =64 + * use vaddr[11:6] to access tag_array + * use vaddr[11:2] to access data_array}}} + * {{{ + * ITIM: + * │ tag │ set │offset│ + * ├way┘ → indicate way location + * │ line │ }}} + * if `way` == b11 (last way), deallocate + * if write to ITIM all I$ will be invalidate + * + * The optional dynamic configurable ITIM sharing SRAM with I$ is set by [[icacheParams.itimAddr]]. + * if PutFullData/PutPartialData to the ITIM address, it will dynamically allocate base address to the address of this accessing from SRAM. + * if access to last way of ITIM, it set will change back to I$. + * + * If ITIM is configured: + * set: if address to access is not to be configured to ITIM yet, + * a memory accessing to ITIM address range will modify `scratchpadMax`, + * from ITIM base to `scratchpadMax` will be used as ITIM. + * unset: @todo + * + * There will always be one way(the last way) used for I$, which cannot be allocated to ITIM. + * + * @param icacheParams parameter to this I$. + */ +class ICache(val icacheParams: ICacheParams)(implicit p: Parameters) + extends LazyModule { + lazy val module = new ICacheModule(this) + + /** Diplomatic hartid bundle used for ITIM. */ + val hartIdSinkNodeOpt = icacheParams.itimAddr.map(_ => BundleBridgeSink[UInt]()) + + /** @todo base address offset for ITIM? */ + val mmioAddressPrefixSinkNodeOpt = icacheParams.itimAddr.map(_ => BundleBridgeSink[UInt]()) + + /** Rocket configuration has virtual memory. + * + * This only affect [[masterNode]] AMBA ports only: + * AMBA privileged, secure will be set as true while others set as false. + * see [[freechips.rocketchip.amba.AMBAProt]] for more informations. + */ + val useVM = p(TileKey).core.useVM + + /** [[TLClientNode]] of I$. + * + * source Id range: + * 0: use [[TLEdgeOut.Get]] to get instruction. + * 1: use [[TLEdgeOut.Hint]] to hint next level memory device fetching next cache line, if configured [[icacheParams.prefetch]]. + * + * @todo why if no [[useVM]], will have AMBAProtField in requestFields? + */ + val masterNode = TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = Seq( + TLMasterParameters.v1( + sourceId = IdRange(0, 1 + icacheParams.prefetch.toInt), // 0=refill, 1=hint + name = s"ICache" + ) + ), + requestFields = useVM.option(Seq()).getOrElse(Seq(AMBAProtField())) + ) + ) + ) + + /** size of [[ICache]], count in byte. */ + val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes + + /** last way will be configured to control offest, access it will deallocate an entire set to I$. */ + val itim_control_offset = size - icacheParams.nSets * icacheParams.blockBytes + + val device = new SimpleDevice("itim", Seq("sifive,itim0")) { + override def describe(resources: ResourceBindings): Description = { + val Description(name, mapping) = super.describe(resources) + val Seq(Binding(_, ResourceAddress(address, perms))) = resources("reg/mem") + val base_address = address.head.base + val mem_part = AddressSet.misaligned(base_address, itim_control_offset) + val control_part = AddressSet.misaligned(base_address + itim_control_offset, size - itim_control_offset) + val extra = Map( + "reg-names" -> Seq(ResourceString("mem"), ResourceString("control")), + "reg" -> Seq(ResourceAddress(mem_part, perms), ResourceAddress(control_part, perms)) + ) + Description(name, mapping ++ extra) + } + } + + def itimProperty: Option[Seq[ResourceValue]] = icacheParams.itimAddr.map(_ => device.asProperty) + + /** @todo why [[wordBytes]] is defined by [[icacheParams.fetchBytes]], rather than 32 directly? */ + private val wordBytes = icacheParams.fetchBytes + + /** Instruction Tightly Integrated Memory node. */ + val slaveNode = + TLManagerNode(icacheParams.itimAddr.toSeq.map { itimAddr => + TLSlavePortParameters.v1( + Seq( + TLSlaveParameters.v1( + address = Seq(AddressSet(itimAddr, size - 1)), + resources = device.reg("mem"), + regionType = RegionType.IDEMPOTENT, + executable = true, + supportsPutFull = TransferSizes(1, wordBytes), + supportsPutPartial = TransferSizes(1, wordBytes), + supportsGet = TransferSizes(1, wordBytes), + fifoId = Some(0) + ) + ), // requests handled in FIFO order + beatBytes = wordBytes, + minLatency = 1 + ) + }) +} + +class ICacheResp(outer: ICache) extends Bundle { + + /** data to CPU. */ + val data = UInt((outer.icacheParams.fetchBytes * 8).W) + + /** ask CPU to replay fetch when tag or data ECC error happened. */ + val replay = Bool() + + /** access exception: + * indicate CPU an tag ECC error happened. + * if [[outer.icacheParams.latency]] is 1, tie 0. + */ + val ae = Bool() + +} + +class ICachePerfEvents extends Bundle { + val acquire = Bool() +} + +/** IO from CPU to ICache. */ +class ICacheBundle(val outer: ICache) extends CoreBundle()(outer.p) { + + /** first cycle requested from CPU. */ + val req = Flipped(Decoupled(new ICacheReq)) + val s1_paddr = Input(UInt(paddrBits.W)) // delayed one cycle w.r.t. req + val s2_vaddr = Input(UInt(vaddrBits.W)) // delayed two cycles w.r.t. req + val s1_kill = Input(Bool()) // delayed one cycle w.r.t. req + val s2_kill = Input(Bool()) // delayed two cycles; prevents I$ miss emission + val s2_cacheable = Input(Bool()) // should L2 cache line on a miss? + val s2_prefetch = Input(Bool()) // should I$ prefetch next line on a miss? + /** response to CPU. */ + val resp = Valid(new ICacheResp(outer)) + + /** flush L1 cache from CPU. + * TODO: IIRC, SFENCE.I + */ + val invalidate = Input(Bool()) + + /** I$ has error, notify to bus. + * TODO: send to BPU. + */ + val errors = new ICacheErrors + + /** for performance counting. */ + val perf = Output(new ICachePerfEvents()) + + /** enable clock. */ + val clock_enabled = Input(Bool()) + + /** I$ miss or ITIM access will still enable clock even [[ICache]] is asked to be gated. */ + val keep_clock_enabled = Output(Bool()) +} + +class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheParameters { + override val cacheParams = outer.icacheParams // Use the local parameters + + /** IO between Core and ICache. */ + val io = IO(new ICacheBundle(outer)) + + /** TileLink port to memory. */ + val (tl_out, edge_out) = outer.masterNode.out(0) + + /** TileLink port as ITIM memory. + * if [[outer.slaveNode]] is not connected [[outer.slaveNode.in]] will be empty. + * + * wes: Option.unzip does not exist :-( + */ + val (tl_in, edge_in) = outer.slaveNode.in.headOption.unzip + + val tECC = cacheParams.tagCode + val dECC = cacheParams.dataCode + + require(isPow2(nSets) && isPow2(nWays)) + require( + !usingVM || outer.icacheParams.itimAddr.isEmpty || pgIdxBits >= untagBits, + s"When VM and ITIM are enabled, I$$ set size must not exceed ${1 << (pgIdxBits - 10)} KiB; got ${(outer.size / nWays) >> 10} KiB" + ) + + /** if this ICache can be used as ITIM, which hart it belongs to. */ + val io_hartid = outer.hartIdSinkNodeOpt.map(_.bundle) + + /** @todo tile Memory mapping I/O base address? */ + val io_mmio_address_prefix = outer.mmioAddressPrefixSinkNodeOpt.map(_.bundle) + + /** register indicates wheather ITIM is enabled. */ + val scratchpadOn = RegInit(false.B) + + /** a cut point to SRAM, indicates which SRAM will be used as SRAM or Cache. */ + val scratchpadMax = tl_in.map(tl => Reg(UInt(log2Ceil(nSets * (nWays - 1)).W))) + + /** Check if a line is in the scratchpad. + * + * line is a minimal granularity accessing to SRAM, calculated by [[scratchpadLine]] + */ + def lineInScratchpad(line: UInt) = scratchpadMax.map(scratchpadOn && line <= _).getOrElse(false.B) + + /** scratchpad base address, if exist [[ICacheParams.itimAddr]], add [[ReplicatedRegion]] to base. + * @todo seem [[io_hartid]] is not connected? + * maybe when implementing itim, LookupByHartId should be changed to [[]]? + */ + val scratchpadBase = outer.icacheParams.itimAddr.map { dummy => + p(LookupByHartId)(_.icache.flatMap(_.itimAddr.map(_.U)), io_hartid.get) | io_mmio_address_prefix.get + } + + /** check an address in the scratchpad address range. */ + def addrMaybeInScratchpad(addr: UInt) = + scratchpadBase.map(base => addr >= base && addr < base + outer.size.U).getOrElse(false.B) + + /** check property this address(paddr) exists in scratchpad. + * @todo seems duplicated in `addrMaybeInScratchpad(addr)` between `lineInScratchpad(addr(untagBits+log2Ceil(nWays)-1, blockOffBits))`? + */ + def addrInScratchpad(addr: UInt) = + addrMaybeInScratchpad(addr) && lineInScratchpad(addr(untagBits + log2Ceil(nWays) - 1, blockOffBits)) + + /** return the way which will be used as scratchpad for accessing address + * {{{ + * │ tag │ set │offset│ + * └way┘ + * }}} + * @param addr address to be found. + */ + def scratchpadWay(addr: UInt) = addr.extract(untagBits + log2Ceil(nWays) - 1, untagBits) + + /** check if the selected way is legal. + * note: the last way should be reserved to ICache. + */ + def scratchpadWayValid(way: UInt) = way < (nWays - 1).U + + /** return the cacheline which will be used as scratchpad for accessing address + * {{{ + * │ tag │ set │offset│ + * ├way┘ → indicate way location + * │ line │ + * }}} + * @param addr address to be found. + * applied to slave_addr + */ + def scratchpadLine(addr: UInt) = addr(untagBits + log2Ceil(nWays) - 1, blockOffBits) + + /** scratchpad access valid in stage N */ + val s0_slaveValid = tl_in.map(_.a.fire).getOrElse(false.B) + val s1_slaveValid = RegNext(s0_slaveValid, false.B) + val s2_slaveValid = RegNext(s1_slaveValid, false.B) + val s3_slaveValid = RegNext(false.B) + + /** valid signal for CPU accessing cache in stage 0. */ + val s0_valid = io.req.fire + + /** virtual address from CPU in stage 0. */ + val s0_vaddr = io.req.bits.addr + + /** valid signal for stage 1, drived by s0_valid. */ + val s1_valid = RegInit(false.B) + + /** virtual address from CPU in stage 1. */ + val s1_vaddr = RegEnable(s0_vaddr, s0_valid) + + /** tag hit vector to indicate hit which way. */ + val s1_tag_hit = Wire(Vec(nWays, Bool())) + + /** CPU I$ Hit in stage 1. + * + * @note + * for logic in `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`, + * there are two different types based on latency: + * + * if latency is 1: `s1_slaveValid === false.B` and `addrMaybeInScratchpad(io.s1_paddr) === false.B` , + * since in this case, ITIM must be empty. + * + * if latency is 2: if `s1_slaveValid` is true, this SRAM accessing is coming from [[tl_in]], so it will hit. + * if `s1_slaveValid` is false, but CPU is accessing memory range in scratchpad address, it will hit by default. + * Hardware won't guarantee this access will access to a data which have been written in ITIM. + * + * @todo seem CPU access are both processed by `s1_tag_hit` and `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`? + */ + val s1_hit = s1_tag_hit.reduce(_ || _) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr)) + dontTouch(s1_hit) + val s2_valid = RegNext(s1_valid && !io.s1_kill, false.B) + val s2_hit = RegNext(s1_hit) + + /** status register to indicate a cache flush. */ + val invalidated = Reg(Bool()) + val refill_valid = RegInit(false.B) + + /** register to indicate [[tl_out]] is performing a hint. + * prefetch only happens after refilling + */ + val send_hint = RegInit(false.B) + + /** indicate [[tl_out]] is performing a refill. */ + val refill_fire = tl_out.a.fire && !send_hint + + /** register to indicate there is a outstanding hint. */ + val hint_outstanding = RegInit(false.B) + + /** [[io]] access L1 I$ miss. */ + val s2_miss = s2_valid && !s2_hit && !io.s2_kill + + /** forward signal to stage 1, permit stage 1 refill. */ + val s1_can_request_refill = !(s2_miss || refill_valid) + + /** real refill signal, stage 2 miss, and was permit to refill in stage 1. + * Since a miss will trigger burst. + * miss under miss won't trigger another burst. + */ + val s2_request_refill = s2_miss && RegNext(s1_can_request_refill) + val refill_paddr = RegEnable(io.s1_paddr, s1_valid && s1_can_request_refill) + val refill_vaddr = RegEnable(s1_vaddr, s1_valid && s1_can_request_refill) + val refill_tag = refill_paddr >> pgUntagBits + val refill_idx = index(refill_vaddr, refill_paddr) + + /** AccessAckData, is refilling I$, it will block request from CPU. */ + val refill_one_beat = tl_out.d.fire && edge_out.hasData(tl_out.d.bits) + + /** block request from CPU when refill or scratch pad access. */ + io.req.ready := !(refill_one_beat || s0_slaveValid || s3_slaveValid) + s1_valid := s0_valid + + val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d) + + /** at last beat of `tl_out.d.fire`, finish refill. */ + val refill_done = refill_one_beat && d_done + + /** scratchpad is writing data. block refill. */ + tl_out.d.ready := !s3_slaveValid + require(edge_out.manager.minLatency > 0) + + /** way to be replaced, implemented with a hardcoded random replacement algorithm */ + val repl_way = + if (isDM) 0.U + else { + // pick a way that is not used by the scratchpad + val v0 = LFSR(16, refill_fire)(log2Up(nWays) - 1, 0) + var v = v0 + for (i <- log2Ceil(nWays) - 1 to 0 by -1) { + val mask = nWays - (BigInt(1) << (i + 1)) + v = v | (lineInScratchpad(Cat(v0 | mask.U, refill_idx)) << i) + } + assert(!lineInScratchpad(Cat(v, refill_idx))) + v + } + + /** Tag SRAM, indexed with virtual memory, + * content with `refillError ## tag[19:0]` after ECC + */ + val tag_array = DescribedSRAM( + name = "tag_array", + desc = "ICache Tag Array", + size = nSets, + data = Vec(nWays, UInt(tECC.width(1 + tagBits).W)) + ) + val tag_rdata = tag_array.read(s0_vaddr(untagBits - 1, blockOffBits), !refill_done && s0_valid) + + /** register indicates the ongoing GetAckData transaction is corrupted. */ + val accruedRefillError = Reg(Bool()) + + /** wire indicates the ongoing GetAckData transaction is corrupted. */ + val refillError = tl_out.d.bits.corrupt || (refill_cnt > 0.U && accruedRefillError) + when(refill_done) { + // For AccessAckData, denied => corrupt + /** data written to [[tag_array]]. + * ECC encoded `refillError ## refill_tag` + */ + val enc_tag = tECC.encode(Cat(refillError, refill_tag)) + tag_array.write(refill_idx, VecInit(Seq.fill(nWays) { enc_tag }), Seq.tabulate(nWays)(repl_way === _.U)) + + ccover(refillError, "D_CORRUPT", "I$ D-channel corrupt") + } + // notify CPU, I$ has corrupt. + io.errors.bus.valid := tl_out.d.fire && (tl_out.d.bits.denied || tl_out.d.bits.corrupt) + io.errors.bus.bits := (refill_paddr >> blockOffBits) << blockOffBits + + /** true indicate this cacheline is valid, + * indexed by (wayIndex ## setIndex) + * after refill_done and not FENCE.I, (repl_way ## refill_idx) set to true. + */ + val vb_array = RegInit(0.U((nSets * nWays).W)) + when(refill_one_beat) { + accruedRefillError := refillError + // clear bit when refill starts so hit-under-miss doesn't fetch bad data + vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated) + } + + /** flush cache when invalidate is true. */ + val invalidate = WireDefault(io.invalidate) + when(invalidate) { + vb_array := 0.U + invalidated := true.B + } + + /** wire indicates that tag is correctable or uncorrectable. + * will trigger CPU to replay and I$ invalidating, if correctable. + */ + val s1_tag_disparity = Wire(Vec(nWays, Bool())) + + /** wire indicates that bus has an uncorrectable error. + * respond to CPU [[io.resp.bits.ae]], cause [[Causes.fetch_access]]. + */ + val s1_tl_error = Wire(Vec(nWays, Bool())) + + /** how many bits will be fetched by CPU for each fetch. */ + val wordBits = outer.icacheParams.fetchBytes * 8 + + /** a set of raw data read from [[data_arrays]]. */ + val s1_dout = Wire(Vec(nWays, UInt(dECC.width(wordBits).W))) + s1_dout := DontCare + + /** address accessed by [[tl_in]] for ITIM. */ + val s0_slaveAddr = tl_in.map(_.a.bits.address).getOrElse(0.U) + + /** address used at stage 1 and 3. + * {{{ + * In stage 1, it caches TileLink data, store in stage 2 if ECC passed. + * In stage 3, it caches corrected data from stage 2, and store in stage 4.}}} + */ + val s1s3_slaveAddr = Reg(UInt(log2Ceil(outer.size).W)) + + /** data used at stage 1 and 3. + * {{{ + * In stage 1, it caches TileLink data, store in stage 2. + * In stage 3, it caches corrected data from data ram, and return to d channel.}}} + */ + val s1s3_slaveData = Reg(UInt(wordBits.W)) + + for (i <- 0 until nWays) { + val s1_idx = index(s1_vaddr, io.s1_paddr) + val s1_tag = io.s1_paddr >> pgUntagBits + + /** this way is used by scratchpad. + * [[tag_array]] corrupted. + */ + val scratchpadHit = scratchpadWayValid(i.U) && + Mux( + s1_slaveValid, + // scratchpad accessing form [[tl_in]]. + // @todo I think XBar will guarantee there won't be an illegal access on the bus? + // so why did have this check `lineInScratchpad(scratchpadLine(s1s3_slaveAddr))`? + // I think it will always be true. + lineInScratchpad(scratchpadLine(s1s3_slaveAddr)) && scratchpadWay(s1s3_slaveAddr) === i.U, + // scratchpad accessing from [[io]]. + // @todo Accessing ITIM correspond address will be able to read cacheline? + // is this desired behavior? + addrInScratchpad(io.s1_paddr) && scratchpadWay(io.s1_paddr) === i.U + ) + val s1_vb = vb_array(Cat(i.U, s1_idx)) && !s1_slaveValid + val enc_tag = tECC.decode(tag_rdata(i)) + + /** [[tl_error]] ECC error bit. + * [[tag]] of [[tag_array]] access. + */ + val (tl_error, tag) = Split(enc_tag.uncorrected, tagBits) + val tagMatch = s1_vb && tag === s1_tag + + /** tag error happens. */ + s1_tag_disparity(i) := s1_vb && enc_tag.error + + /** if tag matched but ecc checking failed, this access will trigger [[Causes.fetch_access]] exception. */ + s1_tl_error(i) := tagMatch && tl_error.asBool + s1_tag_hit(i) := tagMatch || scratchpadHit + } + assert( + !(s1_valid || s1_slaveValid) || PopCount(s1_tag_hit.zip(s1_tag_disparity).map { case (h, d) => h && !d }) <= 1.U + ) + + require(tl_out.d.bits.data.getWidth % wordBits == 0) + + /** Data SRAM + * + * banked with TileLink beat bytes / CPU fetch bytes, + * indexed with [[index]] and multi-beats cycle, + * content with `eccError ## wordBits` after ECC. + * {{{ + * │ │xx│xxxxxx│xxx│x│xx│ + * ↑word + * ↑bank + * ↑way + * └─set──┴─offset─┘ + * └────row───┘ + * }}} + * Note: + * Data SRAM is indexed with virtual memory(vaddr[11:2]), + * - vaddr[11:3]->row, + * - vaddr[2]->bank=i + * - Cache line size = refillCycels(8) * bank(2) * datasize(4 bytes) = 64 bytes + * - data width = 32 + * + * read: + * read happens in stage 0 + * + * write: + * It takes 8 beats to refill 16 instruction in each refilling cycle. + * Data_array receives data[63:0](2 instructions) at once,they will be allocated in deferent bank according to vaddr[2] + */ + val data_arrays = Seq.tabulate(tl_out.d.bits.data.getWidth / wordBits) { i => + DescribedSRAM( + name = s"data_arrays_${i}", + desc = "ICache Data Array", + size = nSets * refillCycles, + data = Vec(nWays, UInt(dECC.width(wordBits).W)) + ) + } + + for ((data_array, i) <- data_arrays.zipWithIndex) { + + /** bank match (vaddr[2]) */ + def wordMatch(addr: UInt) = + addr.extract(log2Ceil(tl_out.d.bits.data.getWidth / 8) - 1, log2Ceil(wordBits / 8)) === i.U + def row(addr: UInt) = addr(untagBits - 1, blockOffBits - log2Ceil(refillCycles)) + + /** read_enable signal */ + val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr)) + + /** write_enable signal + * refill from [[tl_out]] or ITIM write. + */ + val wen = (refill_one_beat && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr)) + + /** index to access [[data_array]]. */ + val mem_idx = + // I$ refill. refill_idx[2:0] is the beats + Mux( + refill_one_beat, + (refill_idx << log2Ceil(refillCycles)) | refill_cnt, + // ITIM write. + Mux( + s3_slaveValid, + row(s1s3_slaveAddr), + // ITIM read. + Mux( + s0_slaveValid, + row(s0_slaveAddr), + // CPU read. + row(s0_vaddr) + ) + ) + ) + when(wen) { + //wr_data + val data = Mux(s3_slaveValid, s1s3_slaveData, tl_out.d.bits.data(wordBits * (i + 1) - 1, wordBits * i)) + //the way to be replaced/written + val way = Mux(s3_slaveValid, scratchpadWay(s1s3_slaveAddr), repl_way) + data_array.write(mem_idx, VecInit(Seq.fill(nWays) { dECC.encode(data) }), (0 until nWays).map(way === _.U)) + } + // write access + /** data read from [[data_array]]. */ + val dout = data_array.read(mem_idx, !wen && s0_ren) + // Mux to select a way to [[s1_dout]] + when(wordMatch(Mux(s1_slaveValid, s1s3_slaveAddr, io.s1_paddr))) { + s1_dout := dout + } + } + + /** When writing full words to ITIM, ECC errors are correctable. + * When writing a full scratchpad word, suppress the read so Xs don't leak out + */ + val s1s2_full_word_write = WireDefault(false.B) + val s1_dont_read = s1_slaveValid && s1s2_full_word_write + + /** clock gate signal for [[s2_tag_hit]], [[s2_dout]], [[s2_tag_disparity]], [[s2_tl_error]], [[s2_scratchpad_hit]]. */ + val s1_clk_en = s1_valid || s1_slaveValid + val s2_tag_hit = RegEnable(Mux(s1_dont_read, 0.U.asTypeOf(s1_tag_hit), s1_tag_hit), s1_clk_en) + + /** way index to access [[data_arrays]]. */ + val s2_hit_way = OHToUInt(s2_tag_hit) + + /** ITIM index to access [[data_arrays]]. + * replace tag with way, word set to 0. + */ + val s2_scratchpad_word_addr = Cat( + s2_hit_way, + Mux(s2_slaveValid, s1s3_slaveAddr, io.s2_vaddr)(untagBits - 1, log2Ceil(wordBits / 8)), + 0.U(log2Ceil(wordBits / 8).W) + ) + val s2_dout = RegEnable(s1_dout, s1_clk_en) + val s2_way_mux = Mux1H(s2_tag_hit, s2_dout) + val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_clk_en).asUInt.orR + val s2_tl_error = RegEnable(s1_tl_error.asUInt.orR, s1_clk_en) + + /** ECC decode result for [[data_arrays]]. */ + val s2_data_decoded = dECC.decode(s2_way_mux) + + /** ECC error happened, correctable or uncorrectable, ask CPU to replay. */ + val s2_disparity = s2_tag_disparity || s2_data_decoded.error + + /** access hit in ITIM, if [[s1_slaveValid]], this access is from [[tl_in]], else from CPU [[io]]. */ + val s1_scratchpad_hit = + Mux(s1_slaveValid, lineInScratchpad(scratchpadLine(s1s3_slaveAddr)), addrInScratchpad(io.s1_paddr)) + + /** stage 2 of [[s1_scratchpad_hit]]. */ + val s2_scratchpad_hit = RegEnable(s1_scratchpad_hit, s1_clk_en) + + /** ITIM uncorrectable read. + * `s2_scratchpad_hit`: processing a scratchpad read(from [[tl_in]] or [[io]]) + * `s2_data_decoded.uncorrectable`: read a uncorrectable data. + * `s2_valid`: [[io]] non-canceled read. + * `(s2_slaveValid && !s2_full_word_write)`: [[tl_in]] read or write a word with wormhole. + * if write a full word, even stage 2 read uncorrectable. + * stage 3 full word write will recovery this. + */ + val s2_report_uncorrectable_error = + s2_scratchpad_hit && s2_data_decoded.uncorrectable && (s2_valid || (s2_slaveValid && !s1s2_full_word_write)) + + /** ECC uncorrectable address, send to Bus Error Unit. */ + val s2_error_addr = + scratchpadBase.map(base => Mux(s2_scratchpad_hit, base + s2_scratchpad_word_addr, 0.U)).getOrElse(0.U) + + // output signals + outer.icacheParams.latency match { + // if I$ latency is 1, no ITIM, no ECC. + case 1 => + require(tECC.isInstanceOf[IdentityCode]) + require(dECC.isInstanceOf[IdentityCode]) + require(outer.icacheParams.itimAddr.isEmpty) + // reply data to CPU at stage 2. no replay. + io.resp.bits.data := Mux1H(s1_tag_hit, s1_dout) + io.resp.bits.ae := s1_tl_error.asUInt.orR + io.resp.valid := s1_valid && s1_hit + io.resp.bits.replay := false.B + + // if I$ latency is 2, can have ITIM and ECC. + case 2 => + // when some sort of memory bit error have occurred + // @todo why so aggressive to invalidate all when ecc corrupted. + when(s2_valid && s2_disparity) { invalidate := true.B } + + // reply data to CPU at stage 2. + io.resp.bits.data := s2_data_decoded.uncorrected + io.resp.bits.ae := s2_tl_error + io.resp.bits.replay := s2_disparity + io.resp.valid := s2_valid && s2_hit + + // report correctable error to BEU at stage 2. + io.errors.correctable.foreach { c => + c.valid := (s2_valid || s2_slaveValid) && s2_disparity && !s2_report_uncorrectable_error + c.bits := s2_error_addr + } + // report uncorrectable error to BEU at stage 2. + io.errors.uncorrectable.foreach { u => + u.valid := s2_report_uncorrectable_error + u.bits := s2_error_addr + } + + // ITIM access + tl_in.map { tl => + /** valid signal for D channel. */ + val respValid = RegInit(false.B) + // ITIM access is unpipelined + tl.a.ready := !(tl_out.d.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid || respValid || !io.clock_enabled) + + /** register used to latch TileLink request for one cycle. */ + val s1_a = RegEnable(tl.a.bits, s0_slaveValid) + // Write Data(Put / PutPartial all mask is 1) + s1s2_full_word_write := edge_in.get.hasData(s1_a) && s1_a.mask.andR + // (de)allocate ITIM + when(s0_slaveValid) { + val a = tl.a.bits + // address + s1s3_slaveAddr := tl.a.bits.address + // store Put/PutP data + s1s3_slaveData := tl.a.bits.data + // S0 + when(edge_in.get.hasData(a)) { + // access data in 0 -> way - 2 allocate and enable, access data in way - 1(last way), deallocate. + val enable = scratchpadWayValid(scratchpadWay(a.address)) + //The address isn't in range, + when(!lineInScratchpad(scratchpadLine(a.address))) { + scratchpadMax.get := scratchpadLine(a.address) + invalidate := true.B + } + scratchpadOn := enable + + val itim_allocated = !scratchpadOn && enable + val itim_deallocated = scratchpadOn && !enable + val itim_increase = scratchpadOn && enable && scratchpadLine(a.address) > scratchpadMax.get + val refilling = refill_valid && refill_cnt > 0.U + ccover(itim_allocated, "ITIM_ALLOCATE", "ITIM allocated") + ccover(itim_allocated && refilling, "ITIM_ALLOCATE_WHILE_REFILL", "ITIM allocated while I$ refill") + ccover(itim_deallocated, "ITIM_DEALLOCATE", "ITIM deallocated") + ccover(itim_deallocated && refilling, "ITIM_DEALLOCATE_WHILE_REFILL", "ITIM deallocated while I$ refill") + ccover(itim_increase, "ITIM_SIZE_INCREASE", "ITIM size increased") + ccover(itim_increase && refilling, "ITIM_SIZE_INCREASE_WHILE_REFILL", "ITIM size increased while I$ refill") + } + } + + assert(!s2_valid || RegNext(RegNext(s0_vaddr)) === io.s2_vaddr) + when( + !(tl.a.valid || s1_slaveValid || s2_slaveValid || respValid) + && s2_valid && s2_data_decoded.error && !s2_tag_disparity + ) { + // handle correctable errors on CPU accesses to the scratchpad. + // if there is an in-flight slave-port access to the scratchpad, + // report the miss but don't correct the error (as there is + // a structural hazard on s1s3_slaveData/s1s3_slaveAddress). + s3_slaveValid := true.B + s1s3_slaveData := s2_data_decoded.corrected + s1s3_slaveAddr := s2_scratchpad_word_addr | s1s3_slaveAddr(log2Ceil(wordBits / 8) - 1, 0) + } + + // back pressure is allowed on the [[tl]] + // pull up [[respValid]] when [[s2_slaveValid]] until [[tl.d.fire]] + respValid := s2_slaveValid || (respValid && !tl.d.ready) + // if [[s2_full_word_write]] will overwrite data, and [[s2_data_decoded.uncorrectable]] can be ignored. + val respError = + RegEnable(s2_scratchpad_hit && s2_data_decoded.uncorrectable && !s1s2_full_word_write, s2_slaveValid) + when(s2_slaveValid) { + // need stage 3 if Put or correct decoding. + // @todo if uncorrectable [[s2_data_decoded]]? + when(edge_in.get.hasData(s1_a) || s2_data_decoded.error) { s3_slaveValid := true.B } + + /** data not masked by the TileLink PutData/PutPartialData. + * means data is stored at [[s1s3_slaveData]] which was read at stage 1. + */ + def byteEn(i: Int) = !(edge_in.get.hasData(s1_a) && s1_a.mask(i)) + // write [[s1s3_slaveData]] based on index of wordBits. + // @todo seems a problem here? + // granularity of CPU fetch is `wordBits/8`, + // granularity of TileLink access is `TLBundleParameters.dataBits/8` + // these two granularity can be different. + // store data read from RAM + s1s3_slaveData := (0 until wordBits / 8) + .map(i => Mux(byteEn(i), s2_data_decoded.corrected, s1s3_slaveData)(8 * (i + 1) - 1, 8 * i)) + .asUInt + } + + tl.d.valid := respValid + tl.d.bits := Mux( + edge_in.get.hasData(s1_a), + // PutData/PutPartialData -> AccessAck + edge_in.get.AccessAck(s1_a), + // Get -> AccessAckData + edge_in.get.AccessAck(s1_a, 0.U, denied = false.B, corrupt = respError) + ) + tl.d.bits.data := s1s3_slaveData + // Tie off unused channels + tl.b.valid := false.B + tl.c.ready := true.B + tl.e.ready := true.B + + ccover(s0_valid && s1_slaveValid, "CONCURRENT_ITIM_ACCESS_1", "ITIM accessed, then I$ accessed next cycle") + ccover( + s0_valid && s2_slaveValid, + "CONCURRENT_ITIM_ACCESS_2", + "ITIM accessed, then I$ accessed two cycles later" + ) + ccover(tl.d.valid && !tl.d.ready, "ITIM_D_STALL", "ITIM response blocked by D-channel") + ccover(tl_out.d.valid && !tl_out.d.ready, "ITIM_BLOCK_D", "D-channel blocked by ITIM access") + } + } + + tl_out.a.valid := s2_request_refill + tl_out.a.bits := edge_out + .Get(fromSource = 0.U, toAddress = (refill_paddr >> blockOffBits) << blockOffBits, lgSize = lgCacheBlockBytes.U) + ._2 + + // prefetch when next-line access does not cross a page + if (cacheParams.prefetch) { + + /** [[crosses_page]] indicate if there is a crosses page access + * [[next_block]] : the address to be prefetched. + */ + val (crosses_page, next_block) = Split(refill_paddr(pgIdxBits - 1, blockOffBits) +& 1.U, pgIdxBits - blockOffBits) + + when(tl_out.a.fire) { + send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page + when(send_hint) { + send_hint := false.B + hint_outstanding := true.B + } + } + + // @todo why refill_done will kill hint at this cycle? + when(refill_done) { + send_hint := false.B + } + + // D channel reply with HintAck. + when(tl_out.d.fire && !refill_one_beat) { + hint_outstanding := false.B + } + + when(send_hint) { + tl_out.a.valid := true.B + tl_out.a.bits := edge_out + .Hint( + fromSource = 1.U, + toAddress = Cat(refill_paddr >> pgIdxBits, next_block) << blockOffBits, + lgSize = lgCacheBlockBytes.U, + param = TLHints.PREFETCH_READ + ) + ._2 + } + + ccover(send_hint && !tl_out.a.ready, "PREFETCH_A_STALL", "I$ prefetch blocked by A-channel") + ccover( + refill_valid && (tl_out.d.fire && !refill_one_beat), + "PREFETCH_D_BEFORE_MISS_D", + "I$ prefetch resolves before miss" + ) + ccover( + !refill_valid && (tl_out.d.fire && !refill_one_beat), + "PREFETCH_D_AFTER_MISS_D", + "I$ prefetch resolves after miss" + ) + ccover(tl_out.a.fire && hint_outstanding, "PREFETCH_D_AFTER_MISS_A", "I$ prefetch resolves after second miss") + } + // Drive APROT information + tl_out.a.bits.user.lift(AMBAProt).foreach { x => + // Rocket caches all fetch requests, and it's difficult to differentiate privileged/unprivileged on + // cached data, so mark as privileged + x.fetch := true.B + x.secure := true.B + x.privileged := true.B + x.bufferable := true.B + x.modifiable := true.B + x.readalloc := io.s2_cacheable + x.writealloc := io.s2_cacheable + } + tl_out.b.ready := true.B + tl_out.c.valid := false.B + tl_out.e.valid := false.B + assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address))) + + // if there is an outstanding refill, cannot flush I$. + when(!refill_valid) { invalidated := false.B } + when(refill_fire) { refill_valid := true.B } + when(refill_done) { refill_valid := false.B } + + io.perf.acquire := refill_fire + // don't gate I$ clock since there are outstanding transcations. + io.keep_clock_enabled := + tl_in + .map(tl => tl.a.valid || tl.d.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid) + .getOrElse(false.B) || // ITIM + s1_valid || s2_valid || refill_valid || send_hint || hint_outstanding // I$ + + /** index to access [[data_arrays]] and [[tag_array]]. + * @note + * if [[untagBits]] > [[pgIdxBits]] in + * {{{ + * ┌──idxBits──┐ + * ↓ ↓ + * │ tag │ set │offset│ + * │ pageTag │ pageIndex│ + * ↑ ↑ ↑ │ + * untagBits│ blockOffBits│ + * pgIdxBits │ + * └msb┴──lsb──┘ + * vaddr paddr + * }}} + * + * else use paddr directly. + * Note: if [[untagBits]] > [[pgIdxBits]], there will be a alias issue which isn't addressend by the icache yet. + */ + def index(vaddr: UInt, paddr: UInt) = { + + /** [[paddr]] as LSB to be used for VIPT. */ + val lsbs = paddr(pgUntagBits - 1, blockOffBits) + + /** if [[untagBits]] > [[pgIdxBits]], append [[vaddr]] to higher bits of index as [[msbs]]. */ + val msbs = (idxBits + blockOffBits > pgUntagBits).option(vaddr(idxBits + blockOffBits - 1, pgUntagBits)) + msbs ## lsbs + } + + ccover(!send_hint && (tl_out.a.valid && !tl_out.a.ready), "MISS_A_STALL", "I$ miss blocked by A-channel") + ccover(invalidate && refill_valid, "FLUSH_DURING_MISS", "I$ flushed during miss") + + def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + property.cover(cond, s"ICACHE_$label", "MemorySystem;;" + desc) + + val mem_active_valid = Seq(property.CoverBoolean(s2_valid, Seq("mem_active"))) + val data_error = Seq( + property.CoverBoolean(!s2_data_decoded.correctable && !s2_data_decoded.uncorrectable, Seq("no_data_error")), + property.CoverBoolean(s2_data_decoded.correctable, Seq("data_correctable_error")), + property.CoverBoolean(s2_data_decoded.uncorrectable, Seq("data_uncorrectable_error")) + ) + val request_source = Seq( + property.CoverBoolean(!s2_slaveValid, Seq("from_CPU")), + property.CoverBoolean(s2_slaveValid, Seq("from_TL")) + ) + val tag_error = Seq( + property.CoverBoolean(!s2_tag_disparity, Seq("no_tag_error")), + property.CoverBoolean(s2_tag_disparity, Seq("tag_error")) + ) + val mem_mode = Seq( + property.CoverBoolean(s2_scratchpad_hit, Seq("ITIM_mode")), + property.CoverBoolean(!s2_scratchpad_hit, Seq("cache_mode")) + ) + + val error_cross_covers = new property.CrossProperty( + Seq(mem_active_valid, data_error, tag_error, request_source, mem_mode), + Seq( + // tag error cannot occur in ITIM mode + Seq("tag_error", "ITIM_mode"), + // Can only respond to TL in ITIM mode + Seq("from_TL", "cache_mode") + ), + "MemorySystem;;Memory Bit Flip Cross Covers" + ) + + property.cover(error_cross_covers) +} From dd24242eb3307064702a7dd082eea7fc260b8bf3 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Fri, 28 Jun 2024 13:30:45 +0800 Subject: [PATCH 062/140] [rocketv] migrate ICache --- rocketv/src/Bundle.scala | 32 ++ rocketv/src/ICache.scala | 1046 ++++++++++++++++++-------------------- 2 files changed, 535 insertions(+), 543 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index d704771e6..ebcd8bc58 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -957,3 +957,35 @@ class L2TLBEntry(nSets: Int, ppnBits: Int, maxSVAddrBits: Int, pgIdxBits: Int, u /** whether the page is readable */ val r = Bool() } + +class ICacheReq(vaddrBits: Int) extends Bundle { + val addr = UInt(vaddrBits.W) +} + +class ICacheResp(fetchBytes: Int) extends Bundle { + + /** data to CPU. + * @todo why 4 instructions? + */ + val data = UInt((fetchBytes * 8).W) + + /** ask CPU to replay fetch when tag or data ECC error happened. */ + val replay = Bool() + + /** access exception: + * indicate CPU an tag ECC error happened. + * if [[outer.icacheParams.latency]] is 1, tie 0. + */ + val ae = Bool() + +} + +class ICacheErrors(hasCorrectable: Boolean, hasUncorrectable: Boolean, paddrBits: Int) extends Bundle { + val correctable = Option.when(hasCorrectable)(Valid(UInt(paddrBits.W))) + val uncorrectable = Option.when(hasUncorrectable)(Valid(UInt(paddrBits.W))) + val bus = Valid(UInt(paddrBits.W)) +} + +class ICachePerfEvents extends Bundle { + val acquire = Bool() +} diff --git a/rocketv/src/ICache.scala b/rocketv/src/ICache.scala index 08b5812ff..ca508f513 100644 --- a/rocketv/src/ICache.scala +++ b/rocketv/src/ICache.scala @@ -1,315 +1,249 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{isPow2, log2Ceil, log2Up, Cat, Decoupled, Mux1H, OHToUInt, PopCount, RegEnable, Valid} -import freechips.rocketchip.amba._ -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.tile._ -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util.{DescribedSRAM, _} -import freechips.rocketchip.util.property -import chisel3.experimental.SourceInfo -import chisel3.dontTouch +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util.random.LFSR -// TODO: get rid of it. -import freechips.rocketchip.rocket.ICacheParams - -trait HasL1ICacheParameters extends HasL1CacheParameters with HasCoreParameters { - val cacheParams = tileParams.icache.get -} - -class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICacheParameters { - val addr = UInt(vaddrBits.W) -} - -class ICacheErrors(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICacheParameters with CanHaveErrors { - val correctable = (cacheParams.tagCode.canDetect || cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) - val uncorrectable = (cacheParams.itimAddr.nonEmpty && cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) - val bus = Valid(UInt(paddrBits.W)) -} - -/** [[ICache]] is a set associated cache I$(Instruction Cache) of Rocket. - * {{{ - * Keywords: Set-associated - * 3 stage pipeline - * Virtually-Indexed Physically-Tagged (VIPT) - * Parallel access to tag and data SRAM - * Random replacement algorithm - * Optional Features: - * Prefetch - * ECC - * Instruction Tightly Integrated Memory(ITIM)}}} - * {{{ - * PipeLine: - * Stage 0 : access data and tag SRAM in parallel - * Stage 1 : receive paddr from CPU - * compare tag and paddr when the entry is valid - * if hit : pick up the target instruction - * if miss : start refilling in stage 2 - * Stage 2 : respond to CPU or start a refill}}} - * {{{ - * Note: Page size = 4KB thus paddr[11:0] = vaddr[11:0] - * considering sets = 64, cachelineBytes =64 - * use vaddr[11:6] to access tag_array - * use vaddr[11:2] to access data_array}}} - * {{{ - * ITIM: - * │ tag │ set │offset│ - * ├way┘ → indicate way location - * │ line │ }}} - * if `way` == b11 (last way), deallocate - * if write to ITIM all I$ will be invalidate - * - * The optional dynamic configurable ITIM sharing SRAM with I$ is set by [[icacheParams.itimAddr]]. - * if PutFullData/PutPartialData to the ITIM address, it will dynamically allocate base address to the address of this accessing from SRAM. - * if access to last way of ITIM, it set will change back to I$. - * - * If ITIM is configured: - * set: if address to access is not to be configured to ITIM yet, - * a memory accessing to ITIM address range will modify `scratchpadMax`, - * from ITIM base to `scratchpadMax` will be used as ITIM. - * unset: @todo - * - * There will always be one way(the last way) used for I$, which cannot be allocated to ITIM. - * - * @param icacheParams parameter to this I$. - */ -class ICache(val icacheParams: ICacheParams)(implicit p: Parameters) - extends LazyModule { - lazy val module = new ICacheModule(this) - - /** Diplomatic hartid bundle used for ITIM. */ - val hartIdSinkNodeOpt = icacheParams.itimAddr.map(_ => BundleBridgeSink[UInt]()) - - /** @todo base address offset for ITIM? */ - val mmioAddressPrefixSinkNodeOpt = icacheParams.itimAddr.map(_ => BundleBridgeSink[UInt]()) - - /** Rocket configuration has virtual memory. - * - * This only affect [[masterNode]] AMBA ports only: - * AMBA privileged, secure will be set as true while others set as false. - * see [[freechips.rocketchip.amba.AMBAProt]] for more informations. - */ - val useVM = p(TileKey).core.useVM - - /** [[TLClientNode]] of I$. - * - * source Id range: - * 0: use [[TLEdgeOut.Get]] to get instruction. - * 1: use [[TLEdgeOut.Hint]] to hint next level memory device fetching next cache line, if configured [[icacheParams.prefetch]]. - * - * @todo why if no [[useVM]], will have AMBAProtField in requestFields? - */ - val masterNode = TLClientNode( - Seq( - TLMasterPortParameters.v1( - clients = Seq( - TLMasterParameters.v1( - sourceId = IdRange(0, 1 + icacheParams.prefetch.toInt), // 0=refill, 1=hint - name = s"ICache" - ) - ), - requestFields = useVM.option(Seq()).getOrElse(Seq(AMBAProtField())) - ) - ) +import chisel3.util._ +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} + +case class ICacheParameter(useAsyncReset: Boolean, + prefetch: Boolean, + nSets: Int, + nWays: Int, + blockBytes: Int, + usingVM: Boolean, + vaddrBits: Int, + paddrBits: Int + ) extends SerializableModuleParameter { + // static for now + val latency: Int = 2 + val itimAXIParameter: Option[AXI4BundleParameter] = None + val itimBaseAddr: Option[BigInt] = None + val tagECC: Option[String] = None + val dataECC: Option[String] = None + // calculated + // todo: param? + val fetchBytes: Int = 4 + val usingITIM: Boolean = itimAXIParameter.isDefined + val tagCode: Code = Code.fromString(tagECC) + val dataCode: Code = Code.fromString(dataECC) + // (cacheParams.tagCode.canDetect || cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) + val hasCorrectable: Boolean = tagCode.canDetect || dataCode.canDetect + // (cacheParams.itimAddr.nonEmpty && cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) + val hasUncorrekoctable: Boolean = itimBaseAddr.nonEmpty && dataCode.canDetect + val isDM: Boolean = nWays == 1 + // axi data with + val rowBits: Int = blockBytes * 8 + val refillCycles: Int = blockBytes * 8 / rowBits + val blockOffBits: Int = log2Up(blockBytes) + val idxBits: Int = log2Up(nSets) + val pgIdxBits: Int = 12 + val untagBits: Int = blockOffBits + idxBits + val pgUntagBits: Int = if (usingVM) untagBits min pgIdxBits else untagBits + val tagBits: Int = paddrBits - pgUntagBits + val instructionFetchParameter: AXI4BundleParameter = AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 0, + userDataWidth = 0, + userRespWidth = 0, + hasAW = false, + hasW = false, + hasB = false, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, ) - - /** size of [[ICache]], count in byte. */ - val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes - - /** last way will be configured to control offest, access it will deallocate an entire set to I$. */ - val itim_control_offset = size - icacheParams.nSets * icacheParams.blockBytes - - val device = new SimpleDevice("itim", Seq("sifive,itim0")) { - override def describe(resources: ResourceBindings): Description = { - val Description(name, mapping) = super.describe(resources) - val Seq(Binding(_, ResourceAddress(address, perms))) = resources("reg/mem") - val base_address = address.head.base - val mem_part = AddressSet.misaligned(base_address, itim_control_offset) - val control_part = AddressSet.misaligned(base_address + itim_control_offset, size - itim_control_offset) - val extra = Map( - "reg-names" -> Seq(ResourceString("mem"), ResourceString("control")), - "reg" -> Seq(ResourceAddress(mem_part, perms), ResourceAddress(control_part, perms)) - ) - Description(name, mapping ++ extra) - } - } - - def itimProperty: Option[Seq[ResourceValue]] = icacheParams.itimAddr.map(_ => device.asProperty) - - /** @todo why [[wordBytes]] is defined by [[icacheParams.fetchBytes]], rather than 32 directly? */ - private val wordBytes = icacheParams.fetchBytes - - /** Instruction Tightly Integrated Memory node. */ - val slaveNode = - TLManagerNode(icacheParams.itimAddr.toSeq.map { itimAddr => - TLSlavePortParameters.v1( - Seq( - TLSlaveParameters.v1( - address = Seq(AddressSet(itimAddr, size - 1)), - resources = device.reg("mem"), - regionType = RegionType.IDEMPOTENT, - executable = true, - supportsPutFull = TransferSizes(1, wordBytes), - supportsPutPartial = TransferSizes(1, wordBytes), - supportsGet = TransferSizes(1, wordBytes), - fifoId = Some(0) - ) - ), // requests handled in FIFO order - beatBytes = wordBytes, - minLatency = 1 - ) - }) -} - -class ICacheResp(outer: ICache) extends Bundle { - - /** data to CPU. */ - val data = UInt((outer.icacheParams.fetchBytes * 8).W) - - /** ask CPU to replay fetch when tag or data ECC error happened. */ - val replay = Bool() - - /** access exception: - * indicate CPU an tag ECC error happened. - * if [[outer.icacheParams.latency]] is 1, tie 0. - */ - val ae = Bool() - } -class ICachePerfEvents extends Bundle { - val acquire = Bool() +object ICacheParameter { + implicit def rwP: upickle.default.ReadWriter[ICacheParameter] = upickle.default.macroRW[ICacheParameter] } -/** IO from CPU to ICache. */ -class ICacheBundle(val outer: ICache) extends CoreBundle()(outer.p) { - +class ICacheInterface(parameter: ICacheParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) /** first cycle requested from CPU. */ - val req = Flipped(Decoupled(new ICacheReq)) - val s1_paddr = Input(UInt(paddrBits.W)) // delayed one cycle w.r.t. req - val s2_vaddr = Input(UInt(vaddrBits.W)) // delayed two cycles w.r.t. req - val s1_kill = Input(Bool()) // delayed one cycle w.r.t. req + val req = Flipped(Decoupled(new ICacheReq(parameter.vaddrBits))) + /** from TLB. */ + val s1_paddr = Input(UInt(parameter.paddrBits.W)) + /** from frontend, pipe from s0. */ + val s2_vaddr = Input(UInt(parameter.vaddrBits.W)) + /** - instruction jmp away(at S2). + * - if TLB not valid, kill it. + * - S2 replay + */ + val s1_kill = Input(Bool()) + /** @todo s2_kill only kill refill? + * - S2 speculative access(refill?) cannot access non-cacheable address? why? + * - S2 exception (PF, AF) + */ val s2_kill = Input(Bool()) // delayed two cycles; prevents I$ miss emission + /** should L2 cache line on a miss? */ val s2_cacheable = Input(Bool()) // should L2 cache line on a miss? + /** should I$ prefetch next line on a miss? */ val s2_prefetch = Input(Bool()) // should I$ prefetch next line on a miss? /** response to CPU. */ - val resp = Valid(new ICacheResp(outer)) + val resp = Valid(new ICacheResp(parameter.fetchBytes)) /** flush L1 cache from CPU. - * TODO: IIRC, SFENCE.I - */ + * TODO: IIRC, SFENCE.I + */ val invalidate = Input(Bool()) - /** I$ has error, notify to bus. - * TODO: send to BPU. - */ - val errors = new ICacheErrors + /** I$ has error, notify to bus. */ + val errors = new ICacheErrors(parameter.hasCorrectable, parameter.hasUncorrekoctable, parameter.paddrBits) /** for performance counting. */ - val perf = Output(new ICachePerfEvents()) + val perf = Output(new ICachePerfEvents) /** enable clock. */ val clock_enabled = Input(Bool()) /** I$ miss or ITIM access will still enable clock even [[ICache]] is asked to be gated. */ val keep_clock_enabled = Output(Bool()) -} -class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheParameters { - override val cacheParams = outer.icacheParams // Use the local parameters + val instructionFetchAXI: AXI4ROIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) - /** IO between Core and ICache. */ - val io = IO(new ICacheBundle(outer)) + val itimAXI: Option[AXI4RWIrrevocable] = parameter.itimAXIParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) +} - /** TileLink port to memory. */ - val (tl_out, edge_out) = outer.masterNode.out(0) +@instantiable +class ICache(val parameter: ICacheParameter) + extends FixedIORawModule(new ICacheInterface(parameter)) + with SerializableModule[ICacheParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + // compatiblity mode + object Split { + def apply(x: UInt, n0: Int) = { + val w = x.getWidth + (x(w-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n1), x(n1-1,n0), x(n0-1,0)) + } + def apply(x: UInt, n2: Int, n1: Int, n0: Int) = { + val w = x.getWidth + (x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0)) + } + } - /** TileLink port as ITIM memory. - * if [[outer.slaveNode]] is not connected [[outer.slaveNode.in]] will be empty. - * - * wes: Option.unzip does not exist :-( - */ - val (tl_in, edge_in) = outer.slaveNode.in.headOption.unzip + val usingVM = parameter.usingVM + val refillCycles = parameter.refillCycles + val pgIdxBits = parameter.pgIdxBits + val untagBits = parameter.untagBits + val nWays = parameter.nWays + val nSets = parameter.nSets + val blockOffBits = parameter.blockOffBits + val idxBits = parameter.idxBits + val pgUntagBits = parameter.pgUntagBits + val tagBits = parameter.tagBits + val isDM = parameter.isDM + object outer { + val size = parameter.nSets * parameter.nWays * parameter.blockBytes + object icacheParams { + val fetchBytes = parameter.fetchBytes + val latency = parameter.latency + } + } + object cacheParams { + val prefetch = parameter.prefetch + } + // end - val tECC = cacheParams.tagCode - val dECC = cacheParams.dataCode + // TODO: move ecc + val tECC: Code = parameter.tagCode + val dECC: Code = parameter.dataCode - require(isPow2(nSets) && isPow2(nWays)) + require(isPow2(parameter.nSets) && isPow2(parameter.nWays)) require( - !usingVM || outer.icacheParams.itimAddr.isEmpty || pgIdxBits >= untagBits, + !usingVM || parameter.usingITIM || pgIdxBits >= untagBits, s"When VM and ITIM are enabled, I$$ set size must not exceed ${1 << (pgIdxBits - 10)} KiB; got ${(outer.size / nWays) >> 10} KiB" ) - /** if this ICache can be used as ITIM, which hart it belongs to. */ - val io_hartid = outer.hartIdSinkNodeOpt.map(_.bundle) - - /** @todo tile Memory mapping I/O base address? */ - val io_mmio_address_prefix = outer.mmioAddressPrefixSinkNodeOpt.map(_.bundle) - /** register indicates wheather ITIM is enabled. */ val scratchpadOn = RegInit(false.B) /** a cut point to SRAM, indicates which SRAM will be used as SRAM or Cache. */ - val scratchpadMax = tl_in.map(tl => Reg(UInt(log2Ceil(nSets * (nWays - 1)).W))) + val scratchpadMax = Option.when(parameter.usingITIM)(Reg(UInt(log2Ceil(nSets * (nWays - 1)).W))) /** Check if a line is in the scratchpad. - * - * line is a minimal granularity accessing to SRAM, calculated by [[scratchpadLine]] - */ + * + * line is a minimal granularity accessing to SRAM, calculated by [[scratchpadLine]] + */ def lineInScratchpad(line: UInt) = scratchpadMax.map(scratchpadOn && line <= _).getOrElse(false.B) /** scratchpad base address, if exist [[ICacheParams.itimAddr]], add [[ReplicatedRegion]] to base. - * @todo seem [[io_hartid]] is not connected? - * maybe when implementing itim, LookupByHartId should be changed to [[]]? - */ - val scratchpadBase = outer.icacheParams.itimAddr.map { dummy => - p(LookupByHartId)(_.icache.flatMap(_.itimAddr.map(_.U)), io_hartid.get) | io_mmio_address_prefix.get - } + * @todo seem [[io_hartid]] is not connected? + * maybe when implementing itim, LookupByHartId should be changed to [[]]? + * should be a Int + */ + val scratchpadBase: Option[UInt] = None /** check an address in the scratchpad address range. */ def addrMaybeInScratchpad(addr: UInt) = scratchpadBase.map(base => addr >= base && addr < base + outer.size.U).getOrElse(false.B) /** check property this address(paddr) exists in scratchpad. - * @todo seems duplicated in `addrMaybeInScratchpad(addr)` between `lineInScratchpad(addr(untagBits+log2Ceil(nWays)-1, blockOffBits))`? - */ + * @todo seems duplicated in `addrMaybeInScratchpad(addr)` between `lineInScratchpad(addr(untagBits+log2Ceil(nWays)-1, blockOffBits))`? + */ def addrInScratchpad(addr: UInt) = addrMaybeInScratchpad(addr) && lineInScratchpad(addr(untagBits + log2Ceil(nWays) - 1, blockOffBits)) /** return the way which will be used as scratchpad for accessing address - * {{{ - * │ tag │ set │offset│ - * └way┘ - * }}} - * @param addr address to be found. - */ - def scratchpadWay(addr: UInt) = addr.extract(untagBits + log2Ceil(nWays) - 1, untagBits) + * {{{ + * │ tag │ set │offset│ + * └way┘ + * }}} + * @param addr address to be found. + */ + def scratchpadWay(addr: UInt) = addr(untagBits + log2Ceil(nWays) - 1, untagBits) /** check if the selected way is legal. - * note: the last way should be reserved to ICache. - */ + * note: the last way should be reserved to ICache. + */ def scratchpadWayValid(way: UInt) = way < (nWays - 1).U /** return the cacheline which will be used as scratchpad for accessing address - * {{{ - * │ tag │ set │offset│ - * ├way┘ → indicate way location - * │ line │ - * }}} - * @param addr address to be found. - * applied to slave_addr - */ + * {{{ + * │ tag │ set │offset│ + * ├way┘ → indicate way location + * │ line │ + * }}} + * @param addr address to be found. + * applied to slave_addr + */ def scratchpadLine(addr: UInt) = addr(untagBits + log2Ceil(nWays) - 1, blockOffBits) /** scratchpad access valid in stage N */ - val s0_slaveValid = tl_in.map(_.a.fire).getOrElse(false.B) + val s0_slaveValid = io.itimAXI.map(axi => axi.w.fire || axi.ar.fire).getOrElse(false.B) + val s0_slaveWriteValid = io.itimAXI.map(axi => axi.w.fire).getOrElse(false.B) + val s1_slaveValid = RegNext(s0_slaveValid, false.B) + val s1_slaveWriteValid = RegNext(s0_slaveWriteValid, false.B) val s2_slaveValid = RegNext(s1_slaveValid, false.B) + val s2_slaveWriteValid = RegNext(s1_slaveWriteValid, false.B) val s3_slaveValid = RegNext(false.B) /** valid signal for CPU accessing cache in stage 0. */ @@ -328,20 +262,20 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val s1_tag_hit = Wire(Vec(nWays, Bool())) /** CPU I$ Hit in stage 1. - * - * @note - * for logic in `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`, - * there are two different types based on latency: - * - * if latency is 1: `s1_slaveValid === false.B` and `addrMaybeInScratchpad(io.s1_paddr) === false.B` , - * since in this case, ITIM must be empty. - * - * if latency is 2: if `s1_slaveValid` is true, this SRAM accessing is coming from [[tl_in]], so it will hit. - * if `s1_slaveValid` is false, but CPU is accessing memory range in scratchpad address, it will hit by default. - * Hardware won't guarantee this access will access to a data which have been written in ITIM. - * - * @todo seem CPU access are both processed by `s1_tag_hit` and `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`? - */ + * + * @note + * for logic in `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`, + * there are two different types based on latency: + * + * if latency is 1: `s1_slaveValid === false.B` and `addrMaybeInScratchpad(io.s1_paddr) === false.B` , + * since in this case, ITIM must be empty. + * + * if latency is 2: if `s1_slaveValid` is true, this SRAM accessing is coming from [[tl_in]], so it will hit. + * if `s1_slaveValid` is false, but CPU is accessing memory range in scratchpad address, it will hit by default. + * Hardware won't guarantee this access will access to a data which have been written in ITIM. + * + * @todo seem CPU access are both processed by `s1_tag_hit` and `Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))`? + */ val s1_hit = s1_tag_hit.reduce(_ || _) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr)) dontTouch(s1_hit) val s2_valid = RegNext(s1_valid && !io.s1_kill, false.B) @@ -352,12 +286,13 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val refill_valid = RegInit(false.B) /** register to indicate [[tl_out]] is performing a hint. - * prefetch only happens after refilling - */ + * prefetch only happens after refilling + */ val send_hint = RegInit(false.B) /** indicate [[tl_out]] is performing a refill. */ - val refill_fire = tl_out.a.fire && !send_hint + // val refill_fire = tl_out.a.fire && !send_hint + val refill_fire = io.instructionFetchAXI.ar.fire && !send_hint /** register to indicate there is a outstanding hint. */ val hint_outstanding = RegInit(false.B) @@ -369,9 +304,9 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val s1_can_request_refill = !(s2_miss || refill_valid) /** real refill signal, stage 2 miss, and was permit to refill in stage 1. - * Since a miss will trigger burst. - * miss under miss won't trigger another burst. - */ + * Since a miss will trigger burst. + * miss under miss won't trigger another burst. + */ val s2_request_refill = s2_miss && RegNext(s1_can_request_refill) val refill_paddr = RegEnable(io.s1_paddr, s1_valid && s1_can_request_refill) val refill_vaddr = RegEnable(s1_vaddr, s1_valid && s1_can_request_refill) @@ -379,20 +314,25 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val refill_idx = index(refill_vaddr, refill_paddr) /** AccessAckData, is refilling I$, it will block request from CPU. */ - val refill_one_beat = tl_out.d.fire && edge_out.hasData(tl_out.d.bits) + // val refill_one_beat = tl_out.d.fire && edge_out.hasData(tl_out.d.bits) + // TODO: check hasData? + val refill_one_beat = io.instructionFetchAXI.r.fire /** block request from CPU when refill or scratch pad access. */ io.req.ready := !(refill_one_beat || s0_slaveValid || s3_slaveValid) s1_valid := s0_valid - val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d) + // val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d) + val d_done: Bool = io.instructionFetchAXI.r.valid && io.instructionFetchAXI.r.bits.last + // todo: burst index always == 0? + val refill_cnt: UInt = 0.U /** at last beat of `tl_out.d.fire`, finish refill. */ val refill_done = refill_one_beat && d_done /** scratchpad is writing data. block refill. */ - tl_out.d.ready := !s3_slaveValid - require(edge_out.manager.minLatency > 0) + io.instructionFetchAXI.r.ready := !s3_slaveValid + // require(edge_out.manager.minLatency > 0) /** way to be replaced, implemented with a hardcoded random replacement algorithm */ val repl_way = @@ -410,39 +350,44 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP } /** Tag SRAM, indexed with virtual memory, - * content with `refillError ## tag[19:0]` after ECC - */ - val tag_array = DescribedSRAM( - name = "tag_array", - desc = "ICache Tag Array", - size = nSets, - data = Vec(nWays, UInt(tECC.width(1 + tagBits).W)) + * content with `refillError ## tag[19:0]` after ECC + */ + val tag_array: SRAMInterface[Vec[UInt]] = SRAM.masked( + size = parameter.nSets, + tpe = Vec(nWays, UInt(tECC.width(1 + tagBits).W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 ) - val tag_rdata = tag_array.read(s0_vaddr(untagBits - 1, blockOffBits), !refill_done && s0_valid) + + // val tag_rdata = tag_array.read(s0_vaddr(untagBits - 1, blockOffBits), !refill_done && s0_valid) + // todo: read req + val tag_rdata: Vec[UInt] = tag_array.readwritePorts.head.readData /** register indicates the ongoing GetAckData transaction is corrupted. */ val accruedRefillError = Reg(Bool()) /** wire indicates the ongoing GetAckData transaction is corrupted. */ - val refillError = tl_out.d.bits.corrupt || (refill_cnt > 0.U && accruedRefillError) - when(refill_done) { - // For AccessAckData, denied => corrupt - /** data written to [[tag_array]]. - * ECC encoded `refillError ## refill_tag` - */ - val enc_tag = tECC.encode(Cat(refillError, refill_tag)) - tag_array.write(refill_idx, VecInit(Seq.fill(nWays) { enc_tag }), Seq.tabulate(nWays)(repl_way === _.U)) - - ccover(refillError, "D_CORRUPT", "I$ D-channel corrupt") + // todo: tl_out.d.bits.corrupt -> false.B + val refillError: Bool = false.B || (refill_cnt > 0.U && accruedRefillError) + val enc_tag = tECC.encode(Cat(refillError, refill_tag)) + tag_array.readwritePorts.foreach {ramPort => + ramPort.enable := s0_valid || refill_done + ramPort.isWrite := refill_done + ramPort.address := Mux(refill_done, refill_idx, s0_vaddr(untagBits - 1, blockOffBits)) + ramPort.writeData := VecInit(Seq.fill(nWays) { enc_tag }) + ramPort.mask.foreach(_ := VecInit(Seq.tabulate(nWays)(repl_way === _.U))) } + // ccover(refillError, "D_CORRUPT", "I$ D-channel corrupt") // notify CPU, I$ has corrupt. - io.errors.bus.valid := tl_out.d.fire && (tl_out.d.bits.denied || tl_out.d.bits.corrupt) + // flase.B -> (tl_out.d.bits.denied || tl_out.d.bits.corrupt) + io.errors.bus.valid := io.instructionFetchAXI.r.fire && false.B io.errors.bus.bits := (refill_paddr >> blockOffBits) << blockOffBits /** true indicate this cacheline is valid, - * indexed by (wayIndex ## setIndex) - * after refill_done and not FENCE.I, (repl_way ## refill_idx) set to true. - */ + * indexed by (wayIndex ## setIndex) + * after refill_done and not FENCE.I, (repl_way ## refill_idx) set to true. + */ val vb_array = RegInit(0.U((nSets * nWays).W)) when(refill_one_beat) { accruedRefillError := refillError @@ -458,13 +403,13 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP } /** wire indicates that tag is correctable or uncorrectable. - * will trigger CPU to replay and I$ invalidating, if correctable. - */ + * will trigger CPU to replay and I$ invalidating, if correctable. + */ val s1_tag_disparity = Wire(Vec(nWays, Bool())) /** wire indicates that bus has an uncorrectable error. - * respond to CPU [[io.resp.bits.ae]], cause [[Causes.fetch_access]]. - */ + * respond to CPU [[io.resp.bits.ae]], cause [[Causes.fetch_access]]. + */ val s1_tl_error = Wire(Vec(nWays, Bool())) /** how many bits will be fetched by CPU for each fetch. */ @@ -475,20 +420,21 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP s1_dout := DontCare /** address accessed by [[tl_in]] for ITIM. */ - val s0_slaveAddr = tl_in.map(_.a.bits.address).getOrElse(0.U) + // val s0_slaveAddr = tl_in.map(_.a.bits.address).getOrElse(0.U) + val s0_slaveAddr = io.itimAXI.map(_.aw.bits.addr).getOrElse(0.U) /** address used at stage 1 and 3. - * {{{ - * In stage 1, it caches TileLink data, store in stage 2 if ECC passed. - * In stage 3, it caches corrected data from stage 2, and store in stage 4.}}} - */ + * {{{ + * In stage 1, it caches TileLink data, store in stage 2 if ECC passed. + * In stage 3, it caches corrected data from stage 2, and store in stage 4.}}} + */ val s1s3_slaveAddr = Reg(UInt(log2Ceil(outer.size).W)) /** data used at stage 1 and 3. - * {{{ - * In stage 1, it caches TileLink data, store in stage 2. - * In stage 3, it caches corrected data from data ram, and return to d channel.}}} - */ + * {{{ + * In stage 1, it caches TileLink data, store in stage 2. + * In stage 3, it caches corrected data from data ram, and return to d channel.}}} + */ val s1s3_slaveData = Reg(UInt(wordBits.W)) for (i <- 0 until nWays) { @@ -496,8 +442,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val s1_tag = io.s1_paddr >> pgUntagBits /** this way is used by scratchpad. - * [[tag_array]] corrupted. - */ + * [[tag_array]] corrupted. + */ val scratchpadHit = scratchpadWayValid(i.U) && Mux( s1_slaveValid, @@ -515,8 +461,9 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val enc_tag = tECC.decode(tag_rdata(i)) /** [[tl_error]] ECC error bit. - * [[tag]] of [[tag_array]] access. - */ + * [[tag]] of [[tag_array]] access. + */ + val (tl_error, tag) = Split(enc_tag.uncorrected, tagBits) val tagMatch = s1_vb && tag === s1_tag @@ -531,57 +478,58 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP !(s1_valid || s1_slaveValid) || PopCount(s1_tag_hit.zip(s1_tag_disparity).map { case (h, d) => h && !d }) <= 1.U ) - require(tl_out.d.bits.data.getWidth % wordBits == 0) + require(io.instructionFetchAXI.r.bits.data.getWidth % wordBits == 0) /** Data SRAM - * - * banked with TileLink beat bytes / CPU fetch bytes, - * indexed with [[index]] and multi-beats cycle, - * content with `eccError ## wordBits` after ECC. - * {{{ - * │ │xx│xxxxxx│xxx│x│xx│ - * ↑word - * ↑bank - * ↑way - * └─set──┴─offset─┘ - * └────row───┘ - * }}} - * Note: - * Data SRAM is indexed with virtual memory(vaddr[11:2]), - * - vaddr[11:3]->row, - * - vaddr[2]->bank=i - * - Cache line size = refillCycels(8) * bank(2) * datasize(4 bytes) = 64 bytes - * - data width = 32 - * - * read: - * read happens in stage 0 - * - * write: - * It takes 8 beats to refill 16 instruction in each refilling cycle. - * Data_array receives data[63:0](2 instructions) at once,they will be allocated in deferent bank according to vaddr[2] - */ - val data_arrays = Seq.tabulate(tl_out.d.bits.data.getWidth / wordBits) { i => - DescribedSRAM( - name = s"data_arrays_${i}", - desc = "ICache Data Array", + * + * banked with TileLink beat bytes / CPU fetch bytes, + * indexed with [[index]] and multi-beats cycle, + * content with `eccError ## wordBits` after ECC. + * {{{ + * │ │xx│xxxxxx│xxx│x│xx│ + * ↑word + * ↑bank + * ↑way + * └─set──┴─offset─┘ + * └────row───┘ + * }}} + * Note: + * Data SRAM is indexed with virtual memory(vaddr[11:2]), + * - vaddr[11:3]->row, + * - vaddr[2]->bank=i + * - Cache line size = refillCycels(8) * bank(2) * datasize(4 bytes) = 64 bytes + * - data width = 32 + * + * read: + * read happens in stage 0 + * + * write: + * It takes 8 beats to refill 16 instruction in each refilling cycle. + * Data_array receives data[63:0](2 instructions) at once,they will be allocated in deferent bank according to vaddr[2] + */ + val data_arrays: Seq[SRAMInterface[Vec[UInt]]] = Seq.tabulate(io.instructionFetchAXI.r.bits.data.getWidth / wordBits) { i => + SRAM.masked( size = nSets * refillCycles, - data = Vec(nWays, UInt(dECC.width(wordBits).W)) + tpe = Vec(nWays, UInt(dECC.width(wordBits).W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 ) } for ((data_array, i) <- data_arrays.zipWithIndex) { /** bank match (vaddr[2]) */ - def wordMatch(addr: UInt) = - addr.extract(log2Ceil(tl_out.d.bits.data.getWidth / 8) - 1, log2Ceil(wordBits / 8)) === i.U + def wordMatch(addr: UInt): Bool = addr(log2Ceil(io.instructionFetchAXI.r.bits.data.getWidth / 8) - 1, log2Ceil(wordBits / 8)) === i.U + // TODO: if we have last? do we need refillCycles? def row(addr: UInt) = addr(untagBits - 1, blockOffBits - log2Ceil(refillCycles)) /** read_enable signal */ val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr)) /** write_enable signal - * refill from [[tl_out]] or ITIM write. - */ + * refill from [[tl_out]] or ITIM write. + */ val wen = (refill_one_beat && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr)) /** index to access [[data_array]]. */ @@ -603,16 +551,20 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP ) ) ) - when(wen) { - //wr_data - val data = Mux(s3_slaveValid, s1s3_slaveData, tl_out.d.bits.data(wordBits * (i + 1) - 1, wordBits * i)) - //the way to be replaced/written - val way = Mux(s3_slaveValid, scratchpadWay(s1s3_slaveAddr), repl_way) - data_array.write(mem_idx, VecInit(Seq.fill(nWays) { dECC.encode(data) }), (0 until nWays).map(way === _.U)) + val data: UInt = Mux(s3_slaveValid, s1s3_slaveData, io.instructionFetchAXI.r.bits.data(wordBits * (i + 1) - 1, wordBits * i)) + //the way to be replaced/written + val way = Mux(s3_slaveValid, scratchpadWay(s1s3_slaveAddr), repl_way) + data_array.readwritePorts.foreach { dataPort => + dataPort.enable := wen || s0_ren + dataPort.isWrite := wen + dataPort.address := mem_idx + dataPort.writeData := VecInit(Seq.fill(nWays) { dECC.encode(data) }) + dataPort.mask.foreach(_ := VecInit((0 until nWays).map(way === _.U))) } + // write access /** data read from [[data_array]]. */ - val dout = data_array.read(mem_idx, !wen && s0_ren) + val dout: Vec[UInt] = data_array.readwritePorts.head.readData // Mux to select a way to [[s1_dout]] when(wordMatch(Mux(s1_slaveValid, s1s3_slaveAddr, io.s1_paddr))) { s1_dout := dout @@ -620,8 +572,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP } /** When writing full words to ITIM, ECC errors are correctable. - * When writing a full scratchpad word, suppress the read so Xs don't leak out - */ + * When writing a full scratchpad word, suppress the read so Xs don't leak out + */ val s1s2_full_word_write = WireDefault(false.B) val s1_dont_read = s1_slaveValid && s1s2_full_word_write @@ -633,8 +585,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val s2_hit_way = OHToUInt(s2_tag_hit) /** ITIM index to access [[data_arrays]]. - * replace tag with way, word set to 0. - */ + * replace tag with way, word set to 0. + */ val s2_scratchpad_word_addr = Cat( s2_hit_way, Mux(s2_slaveValid, s1s3_slaveAddr, io.s2_vaddr)(untagBits - 1, log2Ceil(wordBits / 8)), @@ -659,13 +611,13 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP val s2_scratchpad_hit = RegEnable(s1_scratchpad_hit, s1_clk_en) /** ITIM uncorrectable read. - * `s2_scratchpad_hit`: processing a scratchpad read(from [[tl_in]] or [[io]]) - * `s2_data_decoded.uncorrectable`: read a uncorrectable data. - * `s2_valid`: [[io]] non-canceled read. - * `(s2_slaveValid && !s2_full_word_write)`: [[tl_in]] read or write a word with wormhole. - * if write a full word, even stage 2 read uncorrectable. - * stage 3 full word write will recovery this. - */ + * `s2_scratchpad_hit`: processing a scratchpad read(from [[tl_in]] or [[io]]) + * `s2_data_decoded.uncorrectable`: read a uncorrectable data. + * `s2_valid`: [[io]] non-canceled read. + * `(s2_slaveValid && !s2_full_word_write)`: [[tl_in]] read or write a word with wormhole. + * if write a full word, even stage 2 read uncorrectable. + * stage 3 full word write will recovery this. + */ val s2_report_uncorrectable_error = s2_scratchpad_hit && s2_data_decoded.uncorrectable && (s2_valid || (s2_slaveValid && !s1s2_full_word_write)) @@ -679,7 +631,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP case 1 => require(tECC.isInstanceOf[IdentityCode]) require(dECC.isInstanceOf[IdentityCode]) - require(outer.icacheParams.itimAddr.isEmpty) + require(parameter.itimAXIParameter.isEmpty) // reply data to CPU at stage 2. no replay. io.resp.bits.data := Mux1H(s1_tag_hit, s1_dout) io.resp.bits.ae := s1_tl_error.asUInt.orR @@ -710,50 +662,47 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP } // ITIM access - tl_in.map { tl => + io.itimAXI.foreach { axi => /** valid signal for D channel. */ val respValid = RegInit(false.B) // ITIM access is unpipelined - tl.a.ready := !(tl_out.d.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid || respValid || !io.clock_enabled) - + axi.ar.ready := !(io.instructionFetchAXI.r.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid || respValid || !io.clock_enabled) /** register used to latch TileLink request for one cycle. */ - val s1_a = RegEnable(tl.a.bits, s0_slaveValid) + val s1_a = RegEnable(axi.ar.bits, s0_slaveValid) + val s1_aw = RegEnable(axi.aw.bits, axi.aw.fire) + val s1_w = RegEnable(axi.w.bits, axi.w.fire) // Write Data(Put / PutPartial all mask is 1) - s1s2_full_word_write := edge_in.get.hasData(s1_a) && s1_a.mask.andR + s1s2_full_word_write := axi.w.bits.strb.andR // (de)allocate ITIM - when(s0_slaveValid) { - val a = tl.a.bits + when(axi.w.fire) { // address - s1s3_slaveAddr := tl.a.bits.address + s1s3_slaveAddr := s1_aw.addr // store Put/PutP data - s1s3_slaveData := tl.a.bits.data + s1s3_slaveData := axi.w.bits.data // S0 - when(edge_in.get.hasData(a)) { - // access data in 0 -> way - 2 allocate and enable, access data in way - 1(last way), deallocate. - val enable = scratchpadWayValid(scratchpadWay(a.address)) - //The address isn't in range, - when(!lineInScratchpad(scratchpadLine(a.address))) { - scratchpadMax.get := scratchpadLine(a.address) - invalidate := true.B - } - scratchpadOn := enable - - val itim_allocated = !scratchpadOn && enable - val itim_deallocated = scratchpadOn && !enable - val itim_increase = scratchpadOn && enable && scratchpadLine(a.address) > scratchpadMax.get - val refilling = refill_valid && refill_cnt > 0.U - ccover(itim_allocated, "ITIM_ALLOCATE", "ITIM allocated") - ccover(itim_allocated && refilling, "ITIM_ALLOCATE_WHILE_REFILL", "ITIM allocated while I$ refill") - ccover(itim_deallocated, "ITIM_DEALLOCATE", "ITIM deallocated") - ccover(itim_deallocated && refilling, "ITIM_DEALLOCATE_WHILE_REFILL", "ITIM deallocated while I$ refill") - ccover(itim_increase, "ITIM_SIZE_INCREASE", "ITIM size increased") - ccover(itim_increase && refilling, "ITIM_SIZE_INCREASE_WHILE_REFILL", "ITIM size increased while I$ refill") + // access data in 0 -> way - 2 allocate and enable, access data in way - 1(last way), deallocate. + val enable = scratchpadWayValid(scratchpadWay(s1_aw.addr)) + //The address isn't in range, + when(!lineInScratchpad(scratchpadLine(s1_aw.addr))) { + scratchpadMax.get := scratchpadLine(s1_aw.addr) + invalidate := true.B } + scratchpadOn := enable + // val itim_allocated = !scratchpadOn && enable + // val itim_deallocated = scratchpadOn && !enable + // val itim_increase = scratchpadOn && enable && scratchpadLine(a.address) > scratchpadMax.get + // val refilling = refill_valid && refill_cnt > 0.U + // ccover(itim_allocated, "ITIM_ALLOCATE", "ITIM allocated") + // ccover(itim_allocated && refilling, "ITIM_ALLOCATE_WHILE_REFILL", "ITIM allocated while I$ refill") + // ccover(itim_deallocated, "ITIM_DEALLOCATE", "ITIM deallocated") + // ccover(itim_deallocated && refilling, "ITIM_DEALLOCATE_WHILE_REFILL", "ITIM deallocated while I$ refill") + // ccover(itim_increase, "ITIM_SIZE_INCREASE", "ITIM size increased") + // ccover(itim_increase && refilling, "ITIM_SIZE_INCREASE_WHILE_REFILL", "ITIM size increased while I$ refill") } assert(!s2_valid || RegNext(RegNext(s0_vaddr)) === io.s2_vaddr) when( - !(tl.a.valid || s1_slaveValid || s2_slaveValid || respValid) + !(axi.w.valid || s1_slaveValid || s2_slaveValid || respValid) && s2_valid && s2_data_decoded.error && !s2_tag_disparity ) { // handle correctable errors on CPU accesses to the scratchpad. @@ -767,127 +716,135 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP // back pressure is allowed on the [[tl]] // pull up [[respValid]] when [[s2_slaveValid]] until [[tl.d.fire]] - respValid := s2_slaveValid || (respValid && !tl.d.ready) + respValid := s2_slaveValid || (respValid && !axi.r.ready) // if [[s2_full_word_write]] will overwrite data, and [[s2_data_decoded.uncorrectable]] can be ignored. val respError = RegEnable(s2_scratchpad_hit && s2_data_decoded.uncorrectable && !s1s2_full_word_write, s2_slaveValid) when(s2_slaveValid) { // need stage 3 if Put or correct decoding. // @todo if uncorrectable [[s2_data_decoded]]? - when(edge_in.get.hasData(s1_a) || s2_data_decoded.error) { s3_slaveValid := true.B } + when(s2_slaveWriteValid || s2_data_decoded.error) { s3_slaveValid := true.B } /** data not masked by the TileLink PutData/PutPartialData. - * means data is stored at [[s1s3_slaveData]] which was read at stage 1. - */ - def byteEn(i: Int) = !(edge_in.get.hasData(s1_a) && s1_a.mask(i)) + * means data is stored at [[s1s3_slaveData]] which was read at stage 1. + */ + def byteEn(i: Int) = !axi.w.bits.strb(i) // write [[s1s3_slaveData]] based on index of wordBits. // @todo seems a problem here? // granularity of CPU fetch is `wordBits/8`, // granularity of TileLink access is `TLBundleParameters.dataBits/8` // these two granularity can be different. // store data read from RAM - s1s3_slaveData := (0 until wordBits / 8) + s1s3_slaveData := VecInit((0 until wordBits / 8) .map(i => Mux(byteEn(i), s2_data_decoded.corrected, s1s3_slaveData)(8 * (i + 1) - 1, 8 * i)) - .asUInt + ).asUInt } - tl.d.valid := respValid - tl.d.bits := Mux( - edge_in.get.hasData(s1_a), - // PutData/PutPartialData -> AccessAck - edge_in.get.AccessAck(s1_a), - // Get -> AccessAckData - edge_in.get.AccessAck(s1_a, 0.U, denied = false.B, corrupt = respError) - ) - tl.d.bits.data := s1s3_slaveData + axi.r.valid := respValid + // tl.d.bits := Mux( + // edge_in.get.hasData(s1_a), + // // PutData/PutPartialData -> AccessAck + // edge_in.get.AccessAck(s1_a), + // // Get -> AccessAckData + // edge_in.get.AccessAck(s1_a, 0.U, denied = false.B, corrupt = respError) + // ) + axi.r.bits := DontCare + axi.r.bits.data := s1s3_slaveData + axi.r.bits.last := true.B // Tie off unused channels - tl.b.valid := false.B - tl.c.ready := true.B - tl.e.ready := true.B - - ccover(s0_valid && s1_slaveValid, "CONCURRENT_ITIM_ACCESS_1", "ITIM accessed, then I$ accessed next cycle") - ccover( - s0_valid && s2_slaveValid, - "CONCURRENT_ITIM_ACCESS_2", - "ITIM accessed, then I$ accessed two cycles later" - ) - ccover(tl.d.valid && !tl.d.ready, "ITIM_D_STALL", "ITIM response blocked by D-channel") - ccover(tl_out.d.valid && !tl_out.d.ready, "ITIM_BLOCK_D", "D-channel blocked by ITIM access") + axi.b.valid := false.B + + // ccover(s0_valid && s1_slaveValid, "CONCURRENT_ITIM_ACCESS_1", "ITIM accessed, then I$ accessed next cycle") + // ccover( + // s0_valid && s2_slaveValid, + // "CONCURRENT_ITIM_ACCESS_2", + // "ITIM accessed, then I$ accessed two cycles later" + // ) + // ccover(tl.d.valid && !tl.d.ready, "ITIM_D_STALL", "ITIM response blocked by D-channel") + // ccover(tl_out.d.valid && !tl_out.d.ready, "ITIM_BLOCK_D", "D-channel blocked by ITIM access") } } - tl_out.a.valid := s2_request_refill - tl_out.a.bits := edge_out - .Get(fromSource = 0.U, toAddress = (refill_paddr >> blockOffBits) << blockOffBits, lgSize = lgCacheBlockBytes.U) - ._2 + io.instructionFetchAXI.ar.valid := s2_request_refill + io.instructionFetchAXI.ar.bits := DontCare + io.instructionFetchAXI.ar.bits.id := 0.U + io.instructionFetchAXI.ar.bits.addr := (refill_paddr >> blockOffBits) << blockOffBits + io.instructionFetchAXI.ar.bits.size := log2Up(parameter.blockBytes).U + io.instructionFetchAXI.ar.bits.len := 0.U + io.instructionFetchAXI.ar.bits.burst := 1.U // prefetch when next-line access does not cross a page if (cacheParams.prefetch) { /** [[crosses_page]] indicate if there is a crosses page access - * [[next_block]] : the address to be prefetched. - */ + * [[next_block]] : the address to be prefetched. + */ val (crosses_page, next_block) = Split(refill_paddr(pgIdxBits - 1, blockOffBits) +& 1.U, pgIdxBits - blockOffBits) + // AXI Hint via AxCache ? + + // when(tl_out.a.fire) { + // send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page + // when(send_hint) { + // send_hint := false.B + // hint_outstanding := true.B + // } + // } + // + // // @todo why refill_done will kill hint at this cycle? + // when(refill_done) { + // send_hint := false.B + // } - when(tl_out.a.fire) { - send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page - when(send_hint) { - send_hint := false.B - hint_outstanding := true.B - } - } - - // @todo why refill_done will kill hint at this cycle? - when(refill_done) { - send_hint := false.B - } // D channel reply with HintAck. - when(tl_out.d.fire && !refill_one_beat) { - hint_outstanding := false.B - } - - when(send_hint) { - tl_out.a.valid := true.B - tl_out.a.bits := edge_out - .Hint( - fromSource = 1.U, - toAddress = Cat(refill_paddr >> pgIdxBits, next_block) << blockOffBits, - lgSize = lgCacheBlockBytes.U, - param = TLHints.PREFETCH_READ - ) - ._2 - } - - ccover(send_hint && !tl_out.a.ready, "PREFETCH_A_STALL", "I$ prefetch blocked by A-channel") - ccover( - refill_valid && (tl_out.d.fire && !refill_one_beat), - "PREFETCH_D_BEFORE_MISS_D", - "I$ prefetch resolves before miss" - ) - ccover( - !refill_valid && (tl_out.d.fire && !refill_one_beat), - "PREFETCH_D_AFTER_MISS_D", - "I$ prefetch resolves after miss" - ) - ccover(tl_out.a.fire && hint_outstanding, "PREFETCH_D_AFTER_MISS_A", "I$ prefetch resolves after second miss") + // when(tl_out.d.fire && !refill_one_beat) { + // hint_outstanding := false.B + // } + + // when(send_hint) { + // tl_out.a.valid := true.B + // tl_out.a.bits := edge_out + // .Hint( + // fromSource = 1.U, + // toAddress = Cat(refill_paddr >> pgIdxBits, next_block) << blockOffBits, + // lgSize = lgCacheBlockBytes.U, + // param = TLHints.PREFETCH_READ + // ) + // ._2 + // } + + // ccover(send_hint && !tl_out.a.ready, "PREFETCH_A_STALL", "I$ prefetch blocked by A-channel") + // ccover( + // refill_valid && (tl_out.d.fire && !refill_one_beat), + // "PREFETCH_D_BEFORE_MISS_D", + // "I$ prefetch resolves before miss" + // ) + // ccover( + // !refill_valid && (tl_out.d.fire && !refill_one_beat), + // "PREFETCH_D_AFTER_MISS_D", + // "I$ prefetch resolves after miss" + // ) + // ccover(tl_out.a.fire && hint_outstanding, "PREFETCH_D_AFTER_MISS_A", "I$ prefetch resolves after second miss") } // Drive APROT information - tl_out.a.bits.user.lift(AMBAProt).foreach { x => - // Rocket caches all fetch requests, and it's difficult to differentiate privileged/unprivileged on - // cached data, so mark as privileged - x.fetch := true.B - x.secure := true.B - x.privileged := true.B - x.bufferable := true.B - x.modifiable := true.B - x.readalloc := io.s2_cacheable - x.writealloc := io.s2_cacheable - } - tl_out.b.ready := true.B - tl_out.c.valid := false.B - tl_out.e.valid := false.B - assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address))) + // bufferable ## modifiable ## readalloc ## writealloc ## privileged ## secure ## fetch + io.instructionFetchAXI.ar.bits.user := true.B ## true.B ## io.s2_cacheable ## io.s2_cacheable ## + true.B ## true.B ## true.B + // tl_out.a.bits.user.lift(AMBAProt).foreach { x => + // // Rocket caches all fetch requests, and it's difficult to differentiate privileged/unprivileged on + // // cached data, so mark as privileged + // x.fetch := true.B + // x.secure := true.B + // x.privileged := true.B + // x.bufferable := true.B + // x.modifiable := true.B + // x.readalloc := io.s2_cacheable + // x.writealloc := io.s2_cacheable + // } + // tl_out.b.ready := true.B + // tl_out.c.valid := false.B + // tl_out.e.valid := false.B + assert(!(io.instructionFetchAXI.ar.valid && addrMaybeInScratchpad(io.instructionFetchAXI.ar.bits.addr))) // if there is an outstanding refill, cannot flush I$. when(!refill_valid) { invalidated := false.B } @@ -897,74 +854,77 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) with HasL1ICacheP io.perf.acquire := refill_fire // don't gate I$ clock since there are outstanding transcations. io.keep_clock_enabled := - tl_in - .map(tl => tl.a.valid || tl.d.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid) + io.itimAXI + .map(axi => + axi.ar.valid || axi.aw.valid || axi.w.valid // tl.a.valid + || axi.r.valid //tl.d.valid + || s1_slaveValid || s2_slaveValid || s3_slaveValid) .getOrElse(false.B) || // ITIM s1_valid || s2_valid || refill_valid || send_hint || hint_outstanding // I$ /** index to access [[data_arrays]] and [[tag_array]]. - * @note - * if [[untagBits]] > [[pgIdxBits]] in - * {{{ - * ┌──idxBits──┐ - * ↓ ↓ - * │ tag │ set │offset│ - * │ pageTag │ pageIndex│ - * ↑ ↑ ↑ │ - * untagBits│ blockOffBits│ - * pgIdxBits │ - * └msb┴──lsb──┘ - * vaddr paddr - * }}} - * - * else use paddr directly. - * Note: if [[untagBits]] > [[pgIdxBits]], there will be a alias issue which isn't addressend by the icache yet. - */ + * @note + * if [[untagBits]] > [[pgIdxBits]] in + * {{{ + * ┌──idxBits──┐ + * ↓ ↓ + * │ tag │ set │offset│ + * │ pageTag │ pageIndex│ + * ↑ ↑ ↑ │ + * untagBits│ blockOffBits│ + * pgIdxBits │ + * └msb┴──lsb──┘ + * vaddr paddr + * }}} + * + * else use paddr directly. + * Note: if [[untagBits]] > [[pgIdxBits]], there will be a alias issue which isn't addressend by the icache yet. + */ def index(vaddr: UInt, paddr: UInt) = { /** [[paddr]] as LSB to be used for VIPT. */ val lsbs = paddr(pgUntagBits - 1, blockOffBits) /** if [[untagBits]] > [[pgIdxBits]], append [[vaddr]] to higher bits of index as [[msbs]]. */ - val msbs = (idxBits + blockOffBits > pgUntagBits).option(vaddr(idxBits + blockOffBits - 1, pgUntagBits)) - msbs ## lsbs + val msbs = Option.when(idxBits + blockOffBits > pgUntagBits)(vaddr(idxBits + blockOffBits - 1, pgUntagBits)) + msbs.map(_ ## lsbs).getOrElse(lsbs) } - ccover(!send_hint && (tl_out.a.valid && !tl_out.a.ready), "MISS_A_STALL", "I$ miss blocked by A-channel") - ccover(invalidate && refill_valid, "FLUSH_DURING_MISS", "I$ flushed during miss") - - def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - property.cover(cond, s"ICACHE_$label", "MemorySystem;;" + desc) - - val mem_active_valid = Seq(property.CoverBoolean(s2_valid, Seq("mem_active"))) - val data_error = Seq( - property.CoverBoolean(!s2_data_decoded.correctable && !s2_data_decoded.uncorrectable, Seq("no_data_error")), - property.CoverBoolean(s2_data_decoded.correctable, Seq("data_correctable_error")), - property.CoverBoolean(s2_data_decoded.uncorrectable, Seq("data_uncorrectable_error")) - ) - val request_source = Seq( - property.CoverBoolean(!s2_slaveValid, Seq("from_CPU")), - property.CoverBoolean(s2_slaveValid, Seq("from_TL")) - ) - val tag_error = Seq( - property.CoverBoolean(!s2_tag_disparity, Seq("no_tag_error")), - property.CoverBoolean(s2_tag_disparity, Seq("tag_error")) - ) - val mem_mode = Seq( - property.CoverBoolean(s2_scratchpad_hit, Seq("ITIM_mode")), - property.CoverBoolean(!s2_scratchpad_hit, Seq("cache_mode")) - ) - - val error_cross_covers = new property.CrossProperty( - Seq(mem_active_valid, data_error, tag_error, request_source, mem_mode), - Seq( - // tag error cannot occur in ITIM mode - Seq("tag_error", "ITIM_mode"), - // Can only respond to TL in ITIM mode - Seq("from_TL", "cache_mode") - ), - "MemorySystem;;Memory Bit Flip Cross Covers" - ) - - property.cover(error_cross_covers) + // ccover(!send_hint && (tl_out.a.valid && !tl_out.a.ready), "MISS_A_STALL", "I$ miss blocked by A-channel") + // ccover(invalidate && refill_valid, "FLUSH_DURING_MISS", "I$ flushed during miss") + + // def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + // property.cover(cond, s"ICACHE_$label", "MemorySystem;;" + desc) + // + // val mem_active_valid = Seq(property.CoverBoolean(s2_valid, Seq("mem_active"))) + // val data_error = Seq( + // property.CoverBoolean(!s2_data_decoded.correctable && !s2_data_decoded.uncorrectable, Seq("no_data_error")), + // property.CoverBoolean(s2_data_decoded.correctable, Seq("data_correctable_error")), + // property.CoverBoolean(s2_data_decoded.uncorrectable, Seq("data_uncorrectable_error")) + // ) + // val request_source = Seq( + // property.CoverBoolean(!s2_slaveValid, Seq("from_CPU")), + // property.CoverBoolean(s2_slaveValid, Seq("from_TL")) + // ) + // val tag_error = Seq( + // property.CoverBoolean(!s2_tag_disparity, Seq("no_tag_error")), + // property.CoverBoolean(s2_tag_disparity, Seq("tag_error")) + // ) + // val mem_mode = Seq( + // property.CoverBoolean(s2_scratchpad_hit, Seq("ITIM_mode")), + // property.CoverBoolean(!s2_scratchpad_hit, Seq("cache_mode")) + // ) + + // val error_cross_covers = new property.CrossProperty( + // Seq(mem_active_valid, data_error, tag_error, request_source, mem_mode), + // Seq( + // // tag error cannot occur in ITIM mode + // Seq("tag_error", "ITIM_mode"), + // // Can only respond to TL in ITIM mode + // Seq("from_TL", "cache_mode") + // ), + // "MemorySystem;;Memory Bit Flip Cross Covers" + // ) + // + // property.cover(error_cross_covers) } From 1d14f19c6454e1da1327499af6d39ed4cf5b21a6 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Fri, 28 Jun 2024 13:44:14 +0800 Subject: [PATCH 063/140] [rocketv] add elaborator for ICache - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.ICache config --vaddrBits 32 --paddrBits 32 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.ICache design --parameter ./ICache.json --run-firtool --- elaborator/src/rocketv/ICache.scala | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 elaborator/src/rocketv/ICache.scala diff --git a/elaborator/src/rocketv/ICache.scala b/elaborator/src/rocketv/ICache.scala new file mode 100644 index 000000000..e80b84360 --- /dev/null +++ b/elaborator/src/rocketv/ICache.scala @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{ICache, ICacheParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object ICache extends Elaborator { + @main + case class ICacheParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "prefetch") prefetch: Boolean, + @arg(name = "nSets") nSets: Int, + @arg(name = "nWays") nWays: Int, + @arg(name = "blockBytes") blockBytes: Int, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "vaddrBits") vaddrBits: Int, + @arg(name = "paddrBits") paddrBits: Int) { + def convert: ICacheParameter = ICacheParameter( + useAsyncReset, + prefetch, + nSets, + nWays, + blockBytes, + usingVM, + vaddrBits, + paddrBits + ) + } + + implicit def ICacheParameterMainParser: ParserForClass[ICacheParameterMain] = ParserForClass[ICacheParameterMain] + + @main + def config(@arg(name = "parameter") parameter: ICacheParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[ICache, ICacheParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 67dcb3ba7967cc8f34569a278ad328d0af1f0f1f Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 03:45:42 +0800 Subject: [PATCH 064/140] [rocketv] copy FPU into rocketv project --- rocketv/src/FPU.scala | 752 ++++++++++++++++++++++++++ rocketv/src/fpu/FPToFP.scala | 68 +++ rocketv/src/fpu/FPToInt.scala | 82 +++ rocketv/src/fpu/FPUFMAPipe.scala | 45 ++ rocketv/src/fpu/IntToFP.scala | 53 ++ rocketv/src/fpu/MulAddRecFNPipe.scala | 71 +++ 6 files changed, 1071 insertions(+) create mode 100644 rocketv/src/FPU.scala create mode 100644 rocketv/src/fpu/FPToFP.scala create mode 100644 rocketv/src/fpu/FPToInt.scala create mode 100644 rocketv/src/fpu/FPUFMAPipe.scala create mode 100644 rocketv/src/fpu/IntToFP.scala create mode 100644 rocketv/src/fpu/MulAddRecFNPipe.scala diff --git a/rocketv/src/FPU.scala b/rocketv/src/FPU.scala new file mode 100644 index 000000000..52e544f4c --- /dev/null +++ b/rocketv/src/FPU.scala @@ -0,0 +1,752 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.tile + +import chisel3._ +import chisel3.util._ +import chisel3.{DontCare, WireInit, withClock, withReset} +import chisel3.experimental.SourceInfo +import chisel3.experimental.dataview._ +import org.chipsalliance.cde.config.Parameters +import freechips.rocketchip.rocket._ +import freechips.rocketchip.rocket.Instructions._ +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property + +case class FPUParams( + minFLen: Int = 32, + fLen: Int = 64, + divSqrt: Boolean = true, + sfmaLatency: Int = 3, + dfmaLatency: Int = 4 +) + +object FPConstants +{ + val RM_SZ = 3 + val FLAGS_SZ = 5 +} + +trait HasFPUCtrlSigs { + val ldst = Bool() + val wen = Bool() + val ren1 = Bool() + val ren2 = Bool() + val ren3 = Bool() + val swap12 = Bool() + val swap23 = Bool() + val typeTagIn = UInt(2.W) + val typeTagOut = UInt(2.W) + val fromint = Bool() + val toint = Bool() + val fastpipe = Bool() + val fma = Bool() + val div = Bool() + val sqrt = Bool() + val wflags = Bool() +} + +class FPUCtrlSigs extends Bundle with HasFPUCtrlSigs + +class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) { + val io = IO(new Bundle { + val inst = Input(Bits(32.W)) + val sigs = Output(new FPUCtrlSigs()) + }) + + private val X2 = BitPat.dontCare(2) + + val default = List(X,X,X,X,X,X,X,X2,X2,X,X,X,X,X,X,X) + val h: Array[(BitPat, List[BitPat])] = + Array(FLH -> List(Y,Y,N,N,N,X,X,X2,X2,N,N,N,N,N,N,N), + FSH -> List(Y,N,N,Y,N,Y,X, I, H,N,Y,N,N,N,N,N), + FMV_H_X -> List(N,Y,N,N,N,X,X, H, I,Y,N,N,N,N,N,N), + FCVT_H_W -> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), + FCVT_H_WU-> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), + FCVT_H_L -> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), + FCVT_H_LU-> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), + FMV_X_H -> List(N,N,Y,N,N,N,X, I, H,N,Y,N,N,N,N,N), + FCLASS_H -> List(N,N,Y,N,N,N,X, H, H,N,Y,N,N,N,N,N), + FCVT_W_H -> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), + FCVT_WU_H-> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), + FCVT_L_H -> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), + FCVT_LU_H-> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), + FCVT_S_H -> List(N,Y,Y,N,N,N,X, H, S,N,N,Y,N,N,N,Y), + FCVT_H_S -> List(N,Y,Y,N,N,N,X, S, H,N,N,Y,N,N,N,Y), + FEQ_H -> List(N,N,Y,Y,N,N,N, H, H,N,Y,N,N,N,N,Y), + FLT_H -> List(N,N,Y,Y,N,N,N, H, H,N,Y,N,N,N,N,Y), + FLE_H -> List(N,N,Y,Y,N,N,N, H, H,N,Y,N,N,N,N,Y), + FSGNJ_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,N), + FSGNJN_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,N), + FSGNJX_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,N), + FMIN_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,Y), + FMAX_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,Y), + FADD_H -> List(N,Y,Y,Y,N,N,Y, H, H,N,N,N,Y,N,N,Y), + FSUB_H -> List(N,Y,Y,Y,N,N,Y, H, H,N,N,N,Y,N,N,Y), + FMUL_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,N,Y,N,N,Y), + FMADD_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), + FMSUB_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), + FNMADD_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), + FNMSUB_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), + FDIV_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,N,N,Y,N,Y), + FSQRT_H -> List(N,Y,Y,N,N,N,X, H, H,N,N,N,N,N,Y,Y)) + val f: Array[(BitPat, List[BitPat])] = + Array(FLW -> List(Y,Y,N,N,N,X,X,X2,X2,N,N,N,N,N,N,N), + FSW -> List(Y,N,N,Y,N,Y,X, I, S,N,Y,N,N,N,N,N), + FMV_W_X -> List(N,Y,N,N,N,X,X, S, I,Y,N,N,N,N,N,N), + FCVT_S_W -> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), + FCVT_S_WU-> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), + FCVT_S_L -> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), + FCVT_S_LU-> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), + FMV_X_W -> List(N,N,Y,N,N,N,X, I, S,N,Y,N,N,N,N,N), + FCLASS_S -> List(N,N,Y,N,N,N,X, S, S,N,Y,N,N,N,N,N), + FCVT_W_S -> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), + FCVT_WU_S-> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), + FCVT_L_S -> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), + FCVT_LU_S-> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), + FEQ_S -> List(N,N,Y,Y,N,N,N, S, S,N,Y,N,N,N,N,Y), + FLT_S -> List(N,N,Y,Y,N,N,N, S, S,N,Y,N,N,N,N,Y), + FLE_S -> List(N,N,Y,Y,N,N,N, S, S,N,Y,N,N,N,N,Y), + FSGNJ_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,N), + FSGNJN_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,N), + FSGNJX_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,N), + FMIN_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,Y), + FMAX_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,Y), + FADD_S -> List(N,Y,Y,Y,N,N,Y, S, S,N,N,N,Y,N,N,Y), + FSUB_S -> List(N,Y,Y,Y,N,N,Y, S, S,N,N,N,Y,N,N,Y), + FMUL_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,N,Y,N,N,Y), + FMADD_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), + FMSUB_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), + FNMADD_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), + FNMSUB_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), + FDIV_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,N,N,Y,N,Y), + FSQRT_S -> List(N,Y,Y,N,N,N,X, S, S,N,N,N,N,N,Y,Y)) + val d: Array[(BitPat, List[BitPat])] = + Array(FLD -> List(Y,Y,N,N,N,X,X,X2,X2,N,N,N,N,N,N,N), + FSD -> List(Y,N,N,Y,N,Y,X, I, D,N,Y,N,N,N,N,N), + FMV_D_X -> List(N,Y,N,N,N,X,X, D, I,Y,N,N,N,N,N,N), + FCVT_D_W -> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), + FCVT_D_WU-> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), + FCVT_D_L -> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), + FCVT_D_LU-> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), + FMV_X_D -> List(N,N,Y,N,N,N,X, I, D,N,Y,N,N,N,N,N), + FCLASS_D -> List(N,N,Y,N,N,N,X, D, D,N,Y,N,N,N,N,N), + FCVT_W_D -> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), + FCVT_WU_D-> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), + FCVT_L_D -> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), + FCVT_LU_D-> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), + FCVT_S_D -> List(N,Y,Y,N,N,N,X, D, S,N,N,Y,N,N,N,Y), + FCVT_D_S -> List(N,Y,Y,N,N,N,X, S, D,N,N,Y,N,N,N,Y), + FEQ_D -> List(N,N,Y,Y,N,N,N, D, D,N,Y,N,N,N,N,Y), + FLT_D -> List(N,N,Y,Y,N,N,N, D, D,N,Y,N,N,N,N,Y), + FLE_D -> List(N,N,Y,Y,N,N,N, D, D,N,Y,N,N,N,N,Y), + FSGNJ_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,N), + FSGNJN_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,N), + FSGNJX_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,N), + FMIN_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,Y), + FMAX_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,Y), + FADD_D -> List(N,Y,Y,Y,N,N,Y, D, D,N,N,N,Y,N,N,Y), + FSUB_D -> List(N,Y,Y,Y,N,N,Y, D, D,N,N,N,Y,N,N,Y), + FMUL_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,N,Y,N,N,Y), + FMADD_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), + FMSUB_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), + FNMADD_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), + FNMSUB_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), + FDIV_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,N,N,Y,N,Y), + FSQRT_D -> List(N,Y,Y,N,N,N,X, D, D,N,N,N,N,N,Y,Y)) + val fcvt_hd: Array[(BitPat, List[BitPat])] = + Array(FCVT_H_D -> List(N,Y,Y,N,N,N,X, D, H,N,N,Y,N,N,N,Y), + FCVT_D_H -> List(N,Y,Y,N,N,N,X, H, D,N,N,Y,N,N,N,Y)) + + val insns = (minFLen, fLen) match { + case (32, 32) => f + case (16, 32) => h ++ f + case (32, 64) => f ++ d + case (16, 64) => h ++ f ++ d ++ fcvt_hd + + case other => throw new Exception(s"minFLen = ${minFLen} & fLen = ${fLen} is an unsupported configuration") + } + val decoder = DecodeLogic(io.inst, default, insns) + val s = io.sigs + val sigs = Seq(s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, + s.swap23, s.typeTagIn, s.typeTagOut, s.fromint, s.toint, + s.fastpipe, s.fma, s.div, s.sqrt, s.wflags) + sigs zip decoder map {case(s,d) => s := d} +} + +class FPUCoreIO(implicit p: Parameters) extends CoreBundle()(p) { + val hartid = Input(UInt(hartIdLen.W)) + val time = Input(UInt(xLen.W)) + + val inst = Input(Bits(32.W)) + val fromint_data = Input(Bits(xLen.W)) + + val fcsr_rm = Input(Bits(FPConstants.RM_SZ.W)) + val fcsr_flags = Valid(Bits(FPConstants.FLAGS_SZ.W)) + + val store_data = Output(Bits(fLen.W)) + val toint_data = Output(Bits(xLen.W)) + + val dmem_resp_val = Input(Bool()) + val dmem_resp_type = Input(Bits(3.W)) + val dmem_resp_tag = Input(UInt(5.W)) + val dmem_resp_data = Input(Bits(fLen.W)) + + val valid = Input(Bool()) + val fcsr_rdy = Output(Bool()) + val nack_mem = Output(Bool()) + val illegal_rm = Output(Bool()) + val killx = Input(Bool()) + val killm = Input(Bool()) + val dec = Output(new FPUCtrlSigs()) + val sboard_set = Output(Bool()) + val sboard_clr = Output(Bool()) + val sboard_clra = Output(UInt(5.W)) + + val keep_clock_enabled = Input(Bool()) +} + +class FPUIO(implicit p: Parameters) extends FPUCoreIO ()(p) { + val cp_req = Flipped(Decoupled(new FPInput())) //cp doesn't pay attn to kill sigs + val cp_resp = Decoupled(new FPResult()) +} + +class FPResult(implicit p: Parameters) extends CoreBundle()(p) { + val data = Bits((fLen+1).W) + val exc = Bits(FPConstants.FLAGS_SZ.W) +} + +class IntToFPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs { + val rm = Bits(FPConstants.RM_SZ.W) + val typ = Bits(2.W) + val in1 = Bits(xLen.W) +} + +class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs { + val rm = Bits(FPConstants.RM_SZ.W) + val fmaCmd = Bits(2.W) + val typ = Bits(2.W) + val fmt = Bits(2.W) + val in1 = Bits((fLen+1).W) + val in2 = Bits((fLen+1).W) + val in3 = Bits((fLen+1).W) + +} + +case class FType(exp: Int, sig: Int) { + def ieeeWidth = exp + sig + def recodedWidth = ieeeWidth + 1 + + def ieeeQNaN = ((BigInt(1) << (ieeeWidth - 1)) - (BigInt(1) << (sig - 2))).U(ieeeWidth.W) + def qNaN = ((BigInt(7) << (exp + sig - 3)) + (BigInt(1) << (sig - 2))).U(recodedWidth.W) + def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR + def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2) + + def classify(x: UInt) = { + val sign = x(sig + exp) + val code = x(exp + sig - 1, exp + sig - 3) + val codeHi = code(2, 1) + val isSpecial = codeHi === 3.U + + val isHighSubnormalIn = x(exp + sig - 3, sig - 1) < 2.U + val isSubnormal = code === 1.U || codeHi === 1.U && isHighSubnormalIn + val isNormal = codeHi === 1.U && !isHighSubnormalIn || codeHi === 2.U + val isZero = code === 0.U + val isInf = isSpecial && !code(0) + val isNaN = code.andR + val isSNaN = isNaN && !x(sig-2) + val isQNaN = isNaN && x(sig-2) + + Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign, + isSubnormal && !sign, isZero && !sign, isZero && sign, + isSubnormal && sign, isNormal && sign, isInf && sign) + } + + // convert between formats, ignoring rounding, range, NaN + def unsafeConvert(x: UInt, to: FType) = if (this == to) x else { + val sign = x(sig + exp) + val fractIn = x(sig - 2, 0) + val expIn = x(sig + exp - 1, sig - 1) + val fractOut = fractIn << to.sig >> sig + val expOut = { + val expCode = expIn(exp, exp - 2) + val commonCase = (expIn + (1 << to.exp).U) - (1 << exp).U + Mux(expCode === 0.U || expCode >= 6.U, Cat(expCode, commonCase(to.exp - 3, 0)), commonCase(to.exp, 0)) + } + Cat(sign, expOut, fractOut) + } + + private def ieeeBundle = { + val expWidth = exp + class IEEEBundle extends Bundle { + val sign = Bool() + val exp = UInt(expWidth.W) + val sig = UInt((ieeeWidth-expWidth-1).W) + } + new IEEEBundle + } + + def unpackIEEE(x: UInt) = x.asTypeOf(ieeeBundle) + + def recode(x: UInt) = hardfloat.recFNFromFN(exp, sig, x) + def ieee(x: UInt) = hardfloat.fNFromRecFN(exp, sig, x) +} + +object FType { + val H = new FType(5, 11) + val S = new FType(8, 24) + val D = new FType(11, 53) + + val all = List(H, S, D) +} + +trait HasFPUParameters { + require(fLen == 0 || FType.all.exists(_.ieeeWidth == fLen)) + val minFLen: Int + val fLen: Int + def xLen: Int + val minXLen = 32 + val nIntTypes = log2Ceil(xLen/minXLen) + 1 + def floatTypes = FType.all.filter(t => minFLen <= t.ieeeWidth && t.ieeeWidth <= fLen) + def minType = floatTypes.head + def maxType = floatTypes.last + def prevType(t: FType) = floatTypes(typeTag(t) - 1) + def maxExpWidth = maxType.exp + def maxSigWidth = maxType.sig + def typeTag(t: FType) = floatTypes.indexOf(t) + def typeTagWbOffset = (FType.all.indexOf(minType) + 1).U + def typeTagGroup(t: FType) = (if (floatTypes.contains(t)) typeTag(t) else typeTag(maxType)).U + // typeTag + def H = typeTagGroup(FType.H) + def S = typeTagGroup(FType.S) + def D = typeTagGroup(FType.D) + def I = typeTag(maxType).U + + private def isBox(x: UInt, t: FType): Bool = x(t.sig + t.exp, t.sig + t.exp - 4).andR + + private def box(x: UInt, xt: FType, y: UInt, yt: FType): UInt = { + require(xt.ieeeWidth == 2 * yt.ieeeWidth) + val swizzledNaN = Cat( + x(xt.sig + xt.exp, xt.sig + xt.exp - 3), + x(xt.sig - 2, yt.recodedWidth - 1).andR, + x(xt.sig + xt.exp - 5, xt.sig), + y(yt.recodedWidth - 2), + x(xt.sig - 2, yt.recodedWidth - 1), + y(yt.recodedWidth - 1), + y(yt.recodedWidth - 3, 0)) + Mux(xt.isNaN(x), swizzledNaN, x) + } + + // implement NaN unboxing for FU inputs + def unbox(x: UInt, tag: UInt, exactType: Option[FType]): UInt = { + val outType = exactType.getOrElse(maxType) + def helper(x: UInt, t: FType): Seq[(Bool, UInt)] = { + val prev = + if (t == minType) { + Seq() + } else { + val prevT = prevType(t) + val unswizzled = Cat( + x(prevT.sig + prevT.exp - 1), + x(t.sig - 1), + x(prevT.sig + prevT.exp - 2, 0)) + val prev = helper(unswizzled, prevT) + val isbox = isBox(x, t) + prev.map(p => (isbox && p._1, p._2)) + } + prev :+ (true.B, t.unsafeConvert(x, outType)) + } + + val (oks: Seq[Bool], floats) = helper(x, maxType).unzip + if (exactType.isEmpty || floatTypes.size == 1) { + Mux(oks(tag), floats(tag), maxType.qNaN) + } else { + val t = exactType.get + floats(typeTag(t)) | Mux(oks(typeTag(t)), 0.U, t.qNaN) + } + } + + // make sure that the redundant bits in the NaN-boxed encoding are consistent + def consistent(x: UInt): Bool = { + def helper(x: UInt, t: FType): Bool = if (typeTag(t) == 0) true.B else { + val prevT = prevType(t) + val unswizzled = Cat( + x(prevT.sig + prevT.exp - 1), + x(t.sig - 1), + x(prevT.sig + prevT.exp - 2, 0)) + val prevOK = !isBox(x, t) || helper(unswizzled, prevT) + val curOK = !t.isNaN(x) || x(t.sig + t.exp - 4) === x(t.sig - 2, prevT.recodedWidth - 1).andR + prevOK && curOK + } + helper(x, maxType) + } + + // generate a NaN box from an FU result + def box(x: UInt, t: FType): UInt = { + if (t == maxType) { + x + } else { + val nt = floatTypes(typeTag(t) + 1) + val bigger = box(((BigInt(1) << nt.recodedWidth)-1).U, nt, x, t) + bigger | ((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth)).U + } + } + + // generate a NaN box from an FU result + def box(x: UInt, tag: UInt): UInt = { + val opts = floatTypes.map(t => box(x, t)) + opts(tag) + } + + // zap bits that hardfloat thinks are don't-cares, but we do care about + def sanitizeNaN(x: UInt, t: FType): UInt = { + if (typeTag(t) == 0) { + x + } else { + val maskedNaN = x & ~((BigInt(1) << (t.sig-1)) | (BigInt(1) << (t.sig+t.exp-4))).U(t.recodedWidth.W) + Mux(t.isNaN(x), maskedNaN, x) + } + } + + // implement NaN boxing and recoding for FL*/fmv.*.x + def recode(x: UInt, tag: UInt): UInt = { + def helper(x: UInt, t: FType): UInt = { + if (typeTag(t) == 0) { + t.recode(x) + } else { + val prevT = prevType(t) + box(t.recode(x), t, helper(x, prevT), prevT) + } + } + + // fill MSBs of subword loads to emulate a wider load of a NaN-boxed value + val boxes = floatTypes.map(t => ((BigInt(1) << maxType.ieeeWidth) - (BigInt(1) << t.ieeeWidth)).U) + helper(boxes(tag) | x, maxType) + } + + // implement NaN unboxing and un-recoding for FS*/fmv.x.* + def ieee(x: UInt, t: FType = maxType): UInt = { + if (typeTag(t) == 0) { + t.ieee(x) + } else { + val unrecoded = t.ieee(x) + val prevT = prevType(t) + val prevRecoded = Cat( + x(prevT.recodedWidth-2), + x(t.sig-1), + x(prevT.recodedWidth-3, 0)) + val prevUnrecoded = ieee(prevRecoded, prevT) + Cat(unrecoded >> prevT.ieeeWidth, Mux(t.isNaN(x), prevUnrecoded, unrecoded(prevT.ieeeWidth-1, 0))) + } + } +} + +abstract class FPUModule(implicit val p: Parameters) extends Module with HasCoreParameters with HasFPUParameters + +class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { + val io = IO(new FPUIO) + + val (useClockGating, useDebugROB) = coreParams match { + case r: RocketCoreParams => + val sz = if (r.debugROB.isDefined) r.debugROB.get.size else 1 + (r.clockGate, sz < 1) + case _ => (false, false) + } + val clock_en_reg = Reg(Bool()) + val clock_en = clock_en_reg || io.cp_req.valid + val gated_clock = + if (!useClockGating) clock + else ClockGate(clock, clock_en, "fpu_clock_gate") + + val fp_decoder = Module(new FPUDecoder) + fp_decoder.io.inst := io.inst + val id_ctrl = fp_decoder.io.sigs + + val ex_reg_valid = RegNext(io.valid, false.B) + val ex_reg_inst = RegEnable(io.inst, io.valid) + val ex_reg_ctrl = RegEnable(id_ctrl, io.valid) + val ex_ra = List.fill(3)(Reg(UInt())) + + // load response + val load_wb = RegNext(io.dmem_resp_val) + val load_wb_typeTag = RegEnable(io.dmem_resp_type(1,0) - typeTagWbOffset, io.dmem_resp_val) + val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val) + val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val) + + class FPUImpl { // entering gated-clock domain + + val req_valid = ex_reg_valid || io.cp_req.valid + val ex_cp_valid = io.cp_req.fire + val mem_cp_valid = RegNext(ex_cp_valid, false.B) + val wb_cp_valid = RegNext(mem_cp_valid, false.B) + val mem_reg_valid = RegInit(false.B) + val killm = (io.killm || io.nack_mem) && !mem_cp_valid + // Kill X-stage instruction if M-stage is killed. This prevents it from + // speculatively being sent to the div-sqrt unit, which can cause priority + // inversion for two back-to-back divides, the first of which is killed. + val killx = io.killx || mem_reg_valid && killm + mem_reg_valid := ex_reg_valid && !killx || ex_cp_valid + val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) + val wb_reg_valid = RegNext(mem_reg_valid && (!killm || mem_cp_valid), false.B) + + val cp_ctrl = Wire(new FPUCtrlSigs) + cp_ctrl :<>= io.cp_req.bits.viewAsSupertype(new FPUCtrlSigs) + io.cp_resp.valid := false.B + io.cp_resp.bits.data := 0.U + io.cp_resp.bits.exc := DontCare + + val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, ex_reg_ctrl) + val mem_ctrl = RegEnable(ex_ctrl, req_valid) + val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) + + // CoreMonitorBundle to monitor fp register file writes + val frfWriteBundle = Seq.fill(2)(WireInit(new CoreMonitorBundle(xLen, fLen), DontCare)) + frfWriteBundle.foreach { i => + i.clock := clock + i.reset := reset + i.hartid := io.hartid + i.timer := io.time(31,0) + i.valid := false.B + i.wrenx := false.B + i.wrenf := false.B + i.excpt := false.B + } + + // regfile + val regfile = Mem(32, Bits((fLen+1).W)) + when (load_wb) { + val wdata = recode(load_wb_data, load_wb_typeTag) + regfile(load_wb_tag) := wdata + assert(consistent(wdata)) + if (enableCommitLog) + printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32.U, ieee(wdata)) + if (useDebugROB) + DebugROB.pushWb(clock, reset, io.hartid, load_wb, load_wb_tag + 32.U, ieee(wdata)) + frfWriteBundle(0).wrdst := load_wb_tag + frfWriteBundle(0).wrenf := true.B + frfWriteBundle(0).wrdata := ieee(wdata) + } + + val ex_rs = ex_ra.map(a => regfile(a)) + when (io.valid) { + when (id_ctrl.ren1) { + when (!id_ctrl.swap12) { ex_ra(0) := io.inst(19,15) } + when (id_ctrl.swap12) { ex_ra(1) := io.inst(19,15) } + } + when (id_ctrl.ren2) { + when (id_ctrl.swap12) { ex_ra(0) := io.inst(24,20) } + when (id_ctrl.swap23) { ex_ra(2) := io.inst(24,20) } + when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra(1) := io.inst(24,20) } + } + when (id_ctrl.ren3) { ex_ra(2) := io.inst(31,27) } + } + val ex_rm = Mux(ex_reg_inst(14,12) === 7.U, io.fcsr_rm, ex_reg_inst(14,12)) + + def fuInput(minT: Option[FType]): FPInput = { + val req = Wire(new FPInput) + val tag = ex_ctrl.typeTagIn + req.viewAsSupertype(new Bundle with HasFPUCtrlSigs) :#= ex_ctrl.viewAsSupertype(new Bundle with HasFPUCtrlSigs) + req.rm := ex_rm + req.in1 := unbox(ex_rs(0), tag, minT) + req.in2 := unbox(ex_rs(1), tag, minT) + req.in3 := unbox(ex_rs(2), tag, minT) + req.typ := ex_reg_inst(21,20) + req.fmt := ex_reg_inst(26,25) + req.fmaCmd := ex_reg_inst(3,2) | (!ex_ctrl.ren3 && ex_reg_inst(27)) + when (ex_cp_valid) { + req := io.cp_req.bits + when (io.cp_req.bits.swap23) { + req.in2 := io.cp_req.bits.in3 + req.in3 := io.cp_req.bits.in2 + } + } + req + } + + val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.S)) + sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === S + sfma.io.in.bits := fuInput(Some(sfma.t)) + + val fpiu = Module(new FPToInt) + fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags)) + fpiu.io.in.bits := fuInput(None) + io.store_data := fpiu.io.out.bits.store + io.toint_data := fpiu.io.out.bits.toint + when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){ + io.cp_resp.bits.data := fpiu.io.out.bits.toint + io.cp_resp.valid := true.B + } + + val ifpu = Module(new IntToFP(2)) + ifpu.io.in.valid := req_valid && ex_ctrl.fromint + ifpu.io.in.bits := fpiu.io.in.bits + ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data) + + val fpmu = Module(new FPToFP(2)) + fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe + fpmu.io.in.bits := fpiu.io.in.bits + fpmu.io.lt := fpiu.io.out.bits.lt + + val divSqrt_wen = WireDefault(false.B) + val divSqrt_inFlight = WireDefault(false.B) + val divSqrt_waddr = Reg(UInt(5.W)) + val divSqrt_typeTag = Wire(UInt(log2Up(floatTypes.size).W)) + val divSqrt_wdata = Wire(UInt((fLen+1).W)) + val divSqrt_flags = Wire(UInt(FPConstants.FLAGS_SZ.W)) + divSqrt_typeTag := DontCare + divSqrt_wdata := DontCare + divSqrt_flags := DontCare + // writeback arbitration + case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) + val pipes = List( + Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), + Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), + Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === S, sfma.io.out.bits)) ++ + (fLen > 32).option({ + val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, FType.D)) + dfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === D + dfma.io.in.bits := fuInput(Some(dfma.t)) + Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === D, dfma.io.out.bits) + }) ++ + (minFLen == 16).option({ + val hfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.H)) + hfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === H + hfma.io.in.bits := fuInput(Some(hfma.t)) + Pipe(hfma, hfma.latency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === H, hfma.io.out.bits) + }) + def latencyMask(c: FPUCtrlSigs, offset: Int) = { + require(pipes.forall(_.lat >= offset)) + pipes.map(p => Mux(p.cond(c), (1 << p.lat-offset).U, 0.U)).reduce(_|_) + } + def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), p._2.U, 0.U)).reduce(_|_) + val maxLatency = pipes.map(_.lat).max + val memLatencyMask = latencyMask(mem_ctrl, 2) + + class WBInfo extends Bundle { + val rd = UInt(5.W) + val typeTag = UInt(log2Up(floatTypes.size).W) + val cp = Bool() + val pipeid = UInt(log2Ceil(pipes.size).W) + } + + val wen = RegInit(0.U((maxLatency-1).W)) + val wbInfo = Reg(Vec(maxLatency-1, new WBInfo)) + val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) + val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) + ccover(mem_reg_valid && write_port_busy, "WB_STRUCTURAL", "structural hazard on writeback") + + for (i <- 0 until maxLatency-2) { + when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) } + } + wen := wen >> 1 + when (mem_wen) { + when (!killm) { + wen := wen >> 1 | memLatencyMask + } + for (i <- 0 until maxLatency-1) { + when (!write_port_busy && memLatencyMask(i)) { + wbInfo(i).cp := mem_cp_valid + wbInfo(i).typeTag := mem_ctrl.typeTagOut + wbInfo(i).pipeid := pipeid(mem_ctrl) + wbInfo(i).rd := mem_reg_inst(11,7) + } + } + } + + val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd) + val wtypeTag = Mux(divSqrt_wen, divSqrt_typeTag, wbInfo(0).typeTag) + val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)), wtypeTag) + val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid) + when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) { + assert(consistent(wdata)) + regfile(waddr) := wdata + if (enableCommitLog) { + printf("f%d p%d 0x%x\n", waddr, waddr + 32.U, ieee(wdata)) + } + frfWriteBundle(1).wrdst := waddr + frfWriteBundle(1).wrenf := true.B + frfWriteBundle(1).wrdata := ieee(wdata) + } + if (useDebugROB) { + DebugROB.pushWb(clock, reset, io.hartid, (!wbInfo(0).cp && wen(0)) || divSqrt_wen, waddr + 32.U, ieee(wdata)) + } + + when (wbInfo(0).cp && wen(0)) { + io.cp_resp.bits.data := wdata + io.cp_resp.valid := true.B + } + io.cp_req.ready := !ex_reg_valid + + val wb_toint_valid = wb_reg_valid && wb_ctrl.toint + val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) + io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) + io.fcsr_flags.bits := + Mux(wb_toint_valid, wb_toint_exc, 0.U) | + Mux(divSqrt_wen, divSqrt_flags, 0.U) | + Mux(wen(0), wexc, 0.U) + + val divSqrt_write_port_busy = (mem_ctrl.div || mem_ctrl.sqrt) && wen.orR + io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_inFlight) + io.nack_mem := write_port_busy || divSqrt_write_port_busy || divSqrt_inFlight + io.dec <> fp_decoder.io.sigs + def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(false.B)(_||_) + io.sboard_set := wb_reg_valid && !wb_cp_valid && RegNext(useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt) + io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === x._2.U))) + io.sboard_clra := waddr + ccover(io.sboard_clr && load_wb, "DUAL_WRITEBACK", "load and FMA writeback on same cycle") + // we don't currently support round-max-magnitude (rm=4) + io.illegal_rm := io.inst(14,12).isOneOf(5.U, 6.U) || io.inst(14,12) === 7.U && io.fcsr_rm >= 5.U + + if (cfg.divSqrt) { + val divSqrt_inValid = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_inFlight + val divSqrt_killed = RegNext(divSqrt_inValid && killm, true.B) + when (divSqrt_inValid) { + divSqrt_waddr := mem_reg_inst(11,7) + } + + ccover(divSqrt_inFlight && divSqrt_killed, "DIV_KILLED", "divide killed after issued to divider") + ccover(divSqrt_inFlight && mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt), "DIV_BUSY", "divider structural hazard") + ccover(mem_reg_valid && divSqrt_write_port_busy, "DIV_WB_STRUCTURAL", "structural hazard on division writeback") + + for (t <- floatTypes) { + val tag = mem_ctrl.typeTagOut + val divSqrt = withReset(divSqrt_killed) { Module(new hardfloat.DivSqrtRecFN_small(t.exp, t.sig, 0)) } + divSqrt.io.inValid := divSqrt_inValid && tag === typeTag(t).U + divSqrt.io.sqrtOp := mem_ctrl.sqrt + divSqrt.io.a := maxType.unsafeConvert(fpiu.io.out.bits.in.in1, t) + divSqrt.io.b := maxType.unsafeConvert(fpiu.io.out.bits.in.in2, t) + divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm + divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding + + when (!divSqrt.io.inReady) { divSqrt_inFlight := true.B } // only 1 in flight + + when (divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt) { + divSqrt_wen := !divSqrt_killed + divSqrt_wdata := sanitizeNaN(divSqrt.io.out, t) + divSqrt_flags := divSqrt.io.exceptionFlags + divSqrt_typeTag := typeTag(t).U + } + } + + when (divSqrt_killed) { divSqrt_inFlight := false.B } + } else { + when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true.B } + } + + // gate the clock + clock_en_reg := !useClockGating.B || + io.keep_clock_enabled || // chicken bit + io.valid || // ID stage + req_valid || // EX stage + mem_reg_valid || mem_cp_valid || // MEM stage + wb_reg_valid || wb_cp_valid || // WB stage + wen.orR || divSqrt_inFlight || // post-WB stage + io.dmem_resp_val // load writeback + + } // leaving gated-clock domain + val fpuImpl = withClock (gated_clock) { new FPUImpl } + + def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + property.cover(cond, s"FPU_$label", "Core;;" + desc) +} diff --git a/rocketv/src/fpu/FPToFP.scala b/rocketv/src/fpu/FPToFP.scala new file mode 100644 index 000000000..3df4558ce --- /dev/null +++ b/rocketv/src/fpu/FPToFP.scala @@ -0,0 +1,68 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.tile + +import chisel3._ +import chisel3.util.{Cat, Pipe, Valid} +import freechips.rocketchip.tile.{FPInput, FPResult, FPUModule} + +class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { + val io = IO(new Bundle { + val in = Flipped(Valid(new FPInput)) + val out = Valid(new FPResult) + val lt = Input(Bool()) // from FPToInt + }) + + val in = Pipe(io.in) + + val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2)) + val fsgnj = Cat(signNum(fLen), in.bits.in1(fLen-1, 0)) + + val fsgnjMux = Wire(new FPResult) + fsgnjMux.exc := 0.U + fsgnjMux.data := fsgnj + + when (in.bits.wflags) { // fmin/fmax + val isnan1 = maxType.isNaN(in.bits.in1) + val isnan2 = maxType.isNaN(in.bits.in2) + val isInvalid = maxType.isSNaN(in.bits.in1) || maxType.isSNaN(in.bits.in2) + val isNaNOut = isnan1 && isnan2 + val isLHS = isnan2 || in.bits.rm(0) =/= io.lt && !isnan1 + fsgnjMux.exc := isInvalid << 4 + fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2)) + } + + val inTag = in.bits.typeTagIn + val outTag = in.bits.typeTagOut + val mux = WireDefault(fsgnjMux) + for (t <- floatTypes.init) { + when (outTag === typeTag(t).U) { + mux.data := Cat(fsgnjMux.data >> t.recodedWidth, maxType.unsafeConvert(fsgnjMux.data, t)) + } + } + + when (in.bits.wflags && !in.bits.ren2) { // fcvt + if (floatTypes.size > 1) { + // widening conversions simply canonicalize NaN operands + val widened = Mux(maxType.isNaN(in.bits.in1), maxType.qNaN, in.bits.in1) + fsgnjMux.data := widened + fsgnjMux.exc := maxType.isSNaN(in.bits.in1) << 4 + + // narrowing conversions require rounding (for RVQ, this could be + // optimized to use a single variable-position rounding unit, rather + // than two fixed-position ones) + for (outType <- floatTypes.init) when (outTag === typeTag(outType).U && ((typeTag(outType) == 0).B || outTag < inTag)) { + val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig)) + narrower.io.in := in.bits.in1 + narrower.io.roundingMode := in.bits.rm + narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding + val narrowed = sanitizeNaN(narrower.io.out, outType) + mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed) + mux.exc := narrower.io.exceptionFlags + } + } + } + + io.out <> Pipe(in.valid, mux, latency-1) +} diff --git a/rocketv/src/fpu/FPToInt.scala b/rocketv/src/fpu/FPToInt.scala new file mode 100644 index 000000000..d1607e463 --- /dev/null +++ b/rocketv/src/fpu/FPToInt.scala @@ -0,0 +1,82 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.tile + +import chisel3._ +import chisel3.util.{Cat, Fill, RegEnable, Valid, log2Ceil} +import freechips.rocketchip.tile.{FPConstants, FPInput, FPUModule, FType} + +class FPToInt(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { + class Output extends Bundle { + val in = new FPInput + val lt = Bool() + val store = Bits(fLen.W) + val toint = Bits(xLen.W) + val exc = Bits(FPConstants.FLAGS_SZ.W) + } + val io = IO(new Bundle { + val in = Flipped(Valid(new FPInput)) + val out = Valid(new Output) + }) + + val in = RegEnable(io.in.bits, io.in.valid) + val valid = RegNext(io.in.valid) + + val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth)) + dcmp.io.a := in.in1 + dcmp.io.b := in.in2 + dcmp.io.signaling := !in.rm(1) + + val tag = in.typeTagOut + val store = (floatTypes.map(t => if (t == FType.H) Fill(maxType.ieeeWidth / minXLen, ieee(in.in1)(15, 0).sextTo(minXLen)) + else Fill(maxType.ieeeWidth / t.ieeeWidth, ieee(in.in1)(t.ieeeWidth - 1, 0))): Seq[UInt])(tag) + val toint = WireDefault(store) + val intType = WireDefault(in.fmt(0)) + io.out.bits.store := store + io.out.bits.toint := ((0 until nIntTypes).map(i => toint((minXLen << i) - 1, 0).sextTo(xLen)): Seq[UInt])(intType) + io.out.bits.exc := 0.U + + when (in.rm(0)) { + val classify_out = (floatTypes.map(t => t.classify(maxType.unsafeConvert(in.in1, t))): Seq[UInt])(tag) + toint := classify_out | (store >> minXLen << minXLen) + intType := false.B + } + + when (in.wflags) { // feq/flt/fle, fcvt + toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR | (store >> minXLen << minXLen) + io.out.bits.exc := dcmp.io.exceptionFlags + intType := false.B + + when (!in.ren2) { // fcvt + val cvtType = in.typ.extract(log2Ceil(nIntTypes), 1) + intType := cvtType + val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, xLen)) + conv.io.in := in.in1 + conv.io.roundingMode := in.rm + conv.io.signedOut := ~in.typ(0) + toint := conv.io.out + io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, 0.U(3.W), conv.io.intExceptionFlags(0)) + + for (i <- 0 until nIntTypes-1) { + val w = minXLen << i + when (cvtType === i.U) { + val narrow = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, w)) + narrow.io.in := in.in1 + narrow.io.roundingMode := in.rm + narrow.io.signedOut := ~in.typ(0) + + val excSign = in.in1(maxExpWidth + maxSigWidth) && !maxType.isNaN(in.in1) + val excOut = Cat(conv.io.signedOut === excSign, Fill(w-1, !excSign)) + val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1) + when (invalid) { toint := Cat(conv.io.out >> w, excOut) } + io.out.bits.exc := Cat(invalid, 0.U(3.W), !invalid && conv.io.intExceptionFlags(0)) + } + } + } + } + + io.out.valid := valid + io.out.bits.lt := dcmp.io.lt || (dcmp.io.a.asSInt < 0.S && dcmp.io.b.asSInt >= 0.S) + io.out.bits.in := in +} diff --git a/rocketv/src/fpu/FPUFMAPipe.scala b/rocketv/src/fpu/FPUFMAPipe.scala new file mode 100644 index 000000000..eefccae3f --- /dev/null +++ b/rocketv/src/fpu/FPUFMAPipe.scala @@ -0,0 +1,45 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.tile + +import chisel3.util.{Pipe, Valid} +import chisel3.{Bundle, Flipped, Module, Reg, RegNext, Wire, when} +import freechips.rocketchip.tile.{FPInput, FPResult, FPUModule, FType, MulAddRecFNPipe} + +class FPUFMAPipe(val latency: Int, val t: FType) + (implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { + require(latency>0) + + val io = IO(new Bundle { + val in = Flipped(Valid(new FPInput)) + val out = Valid(new FPResult) + }) + + val valid = RegNext(io.in.valid) + val in = Reg(new FPInput) + when (io.in.valid) { + val one = 1.U << (t.sig + t.exp - 1) + val zero = (io.in.bits.in1 ^ io.in.bits.in2) & (1.U << (t.sig + t.exp)) + val cmd_fma = io.in.bits.ren3 + val cmd_addsub = io.in.bits.swap23 + in := io.in.bits + when (cmd_addsub) { in.in2 := one } + when (!(cmd_fma || cmd_addsub)) { in.in3 := zero } + } + + val fma = Module(new MulAddRecFNPipe((latency-1) min 2, t.exp, t.sig)) + fma.io.validin := valid + fma.io.op := in.fmaCmd + fma.io.roundingMode := in.rm + fma.io.detectTininess := hardfloat.consts.tininess_afterRounding + fma.io.a := in.in1 + fma.io.b := in.in2 + fma.io.c := in.in3 + + val res = Wire(new FPResult) + res.data := sanitizeNaN(fma.io.out, t) + res.exc := fma.io.exceptionFlags + + io.out := Pipe(fma.io.validout, res, (latency-3) max 0) +} diff --git a/rocketv/src/fpu/IntToFP.scala b/rocketv/src/fpu/IntToFP.scala new file mode 100644 index 000000000..12cb87405 --- /dev/null +++ b/rocketv/src/fpu/IntToFP.scala @@ -0,0 +1,53 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.tile + +import chisel3._ +import chisel3.util.{Cat, Pipe, Valid, log2Ceil} +import freechips.rocketchip.tile.{FPResult, FPUModule, IntToFPInput} + +class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { + val io = IO(new Bundle { + val in = Flipped(Valid(new IntToFPInput)) + val out = Valid(new FPResult) + }) + + val in = Pipe(io.in) + val tag = in.bits.typeTagIn + + val mux = Wire(new FPResult) + mux.exc := 0.U + mux.data := recode(in.bits.in1, tag) + + val intValue = { + val res = WireDefault(in.bits.in1.asSInt) + for (i <- 0 until nIntTypes-1) { + val smallInt = in.bits.in1((minXLen << i) - 1, 0) + when (in.bits.typ.extract(log2Ceil(nIntTypes), 1) === i.U) { + res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt) + } + } + res.asUInt + } + + when (in.bits.wflags) { // fcvt + // could be improved for RVD/RVQ with a single variable-position rounding + // unit, rather than N fixed-position ones + val i2fResults = for (t <- floatTypes) yield { + val i2f = Module(new hardfloat.INToRecFN(xLen, t.exp, t.sig)) + i2f.io.signedIn := ~in.bits.typ(0) + i2f.io.in := intValue + i2f.io.roundingMode := in.bits.rm + i2f.io.detectTininess := hardfloat.consts.tininess_afterRounding + (sanitizeNaN(i2f.io.out, t), i2f.io.exceptionFlags) + } + + val (data, exc) = i2fResults.unzip + val dataPadded = data.init.map(d => Cat(data.last >> d.getWidth, d)) :+ data.last + mux.data := dataPadded(tag) + mux.exc := exc(tag) + } + + io.out <> Pipe(in.valid, mux, latency-1) +} diff --git a/rocketv/src/fpu/MulAddRecFNPipe.scala b/rocketv/src/fpu/MulAddRecFNPipe.scala new file mode 100644 index 000000000..27ab41767 --- /dev/null +++ b/rocketv/src/fpu/MulAddRecFNPipe.scala @@ -0,0 +1,71 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.tile + + +import chisel3._ +import chisel3.util.Pipe + +class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int) extends Module +{ + require(latency<=2) + + val io = IO(new Bundle { + val validin = Input(Bool()) + val op = Input(Bits(2.W)) + val a = Input(Bits((expWidth + sigWidth + 1).W)) + val b = Input(Bits((expWidth + sigWidth + 1).W)) + val c = Input(Bits((expWidth + sigWidth + 1).W)) + val roundingMode = Input(UInt(3.W)) + val detectTininess = Input(UInt(1.W)) + val out = Output(Bits((expWidth + sigWidth + 1).W)) + val exceptionFlags = Output(Bits(5.W)) + val validout = Output(Bool()) + }) + + //------------------------------------------------------------------------ + //------------------------------------------------------------------------ + + val mulAddRecFNToRaw_preMul = Module(new hardfloat.MulAddRecFNToRaw_preMul(expWidth, sigWidth)) + val mulAddRecFNToRaw_postMul = Module(new hardfloat.MulAddRecFNToRaw_postMul(expWidth, sigWidth)) + + mulAddRecFNToRaw_preMul.io.op := io.op + mulAddRecFNToRaw_preMul.io.a := io.a + mulAddRecFNToRaw_preMul.io.b := io.b + mulAddRecFNToRaw_preMul.io.c := io.c + + val mulAddResult = + (mulAddRecFNToRaw_preMul.io.mulAddA * + mulAddRecFNToRaw_preMul.io.mulAddB) +& + mulAddRecFNToRaw_preMul.io.mulAddC + + val valid_stage0 = Wire(Bool()) + val roundingMode_stage0 = Wire(UInt(3.W)) + val detectTininess_stage0 = Wire(UInt(1.W)) + + val postmul_regs = if(latency>0) 1 else 0 + mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe(io.validin, mulAddRecFNToRaw_preMul.io.toPostMul, postmul_regs).bits + mulAddRecFNToRaw_postMul.io.mulAddResult := Pipe(io.validin, mulAddResult, postmul_regs).bits + mulAddRecFNToRaw_postMul.io.roundingMode := Pipe(io.validin, io.roundingMode, postmul_regs).bits + roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits + detectTininess_stage0 := Pipe(io.validin, io.detectTininess, postmul_regs).bits + valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid + + //------------------------------------------------------------------------ + //------------------------------------------------------------------------ + + val roundRawFNToRecFN = Module(new hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0)) + + val round_regs = if(latency==2) 1 else 0 + roundRawFNToRecFN.io.invalidExc := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.invalidExc, round_regs).bits + roundRawFNToRecFN.io.in := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.rawOut, round_regs).bits + roundRawFNToRecFN.io.roundingMode := Pipe(valid_stage0, roundingMode_stage0, round_regs).bits + roundRawFNToRecFN.io.detectTininess := Pipe(valid_stage0, detectTininess_stage0, round_regs).bits + io.validout := Pipe(valid_stage0, false.B, round_regs).valid + + roundRawFNToRecFN.io.infiniteExc := false.B + + io.out := roundRawFNToRecFN.io.out + io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags +} \ No newline at end of file From e71e42e41aea0e6de1f67b5ba9820163c5a7af1c Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 14:10:44 +0800 Subject: [PATCH 065/140] [rocketv] migrate FPU --- rocketv/src/Bundle.scala | 84 ++ rocketv/src/FPU.scala | 1257 ++++++++++++++----------- rocketv/src/fpu/FPToFP.scala | 91 +- rocketv/src/fpu/FPToInt.scala | 101 +- rocketv/src/fpu/FPUFMAPipe.scala | 80 +- rocketv/src/fpu/IntToFP.scala | 73 +- rocketv/src/fpu/MulAddRecFNPipe.scala | 101 +- 7 files changed, 1090 insertions(+), 697 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index ebcd8bc58..741b297ad 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -989,3 +989,87 @@ class ICacheErrors(hasCorrectable: Boolean, hasUncorrectable: Boolean, paddrBits class ICachePerfEvents extends Bundle { val acquire = Bool() } + +class FPInput(fLen: Int) extends Bundle { + val fpuControl = new FPUCtrlSigs + val rm = UInt(FPConstants.RM_SZ.W) + val fmaCmd = UInt(2.W) + val typ = UInt(2.W) + val fmt = UInt(2.W) + val in1 = UInt((fLen+1).W) + val in2 = UInt((fLen+1).W) + val in3 = UInt((fLen+1).W) +} + +// @todo DecodeBundle +class FPUCtrlSigs extends Bundle { + val ldst = Bool() + val wen = Bool() + val ren1 = Bool() + val ren2 = Bool() + val ren3 = Bool() + val swap12 = Bool() + val swap23 = Bool() + val typeTagIn = UInt(2.W) + val typeTagOut = UInt(2.W) + val fromint = Bool() + val toint = Bool() + val fastpipe = Bool() + val fma = Bool() + val div = Bool() + val sqrt = Bool() + val wflags = Bool() +} + +class FPResult(fLen: Int) extends Bundle { + val data = UInt((fLen+1).W) + val exc = UInt(FPConstants.FLAGS_SZ.W) +} + +class FPToIntOutput(fLen: Int, xLen: Int) extends Bundle { + val in = new FPInput(fLen) + val lt = Bool() + val store = UInt(fLen.W) + val toint = UInt(xLen.W) + val exc = UInt(FPConstants.FLAGS_SZ.W) +} + +class IntToFPInput(xLen: Int) extends Bundle { + val fpuControl = new FPUCtrlSigs + val rm = UInt(FPConstants.RM_SZ.W) + val typ = UInt(2.W) + val in1 = UInt(xLen.W) +} + + +class FPUCoreIO(hartIdLen: Int, xLen: Int, fLen: Int) extends Bundle { + val hartid = Input(UInt(hartIdLen.W)) + val time = Input(UInt(xLen.W)) + + val inst = Input(UInt(32.W)) + val fromint_data = Input(UInt(xLen.W)) + + val fcsr_rm = Input(UInt(FPConstants.RM_SZ.W)) + val fcsr_flags = Valid(UInt(FPConstants.FLAGS_SZ.W)) + + val store_data = Output(UInt(fLen.W)) + val toint_data = Output(UInt(xLen.W)) + + val dmem_resp_val = Input(Bool()) + val dmem_resp_type = Input(UInt(3.W)) + val dmem_resp_tag = Input(UInt(5.W)) + val dmem_resp_data = Input(UInt(fLen.W)) + + val valid = Input(Bool()) + val fcsr_rdy = Output(Bool()) + val nack_mem = Output(Bool()) + val illegal_rm = Output(Bool()) + val killx = Input(Bool()) + val killm = Input(Bool()) + val dec = Output(new FPUCtrlSigs()) + val sboard_set = Output(Bool()) + val sboard_clr = Output(Bool()) + val sboard_clra = Output(UInt(5.W)) + + val keep_clock_enabled = Input(Bool()) +} diff --git a/rocketv/src/FPU.scala b/rocketv/src/FPU.scala index 52e544f4c..c980908d4 100644 --- a/rocketv/src/FPU.scala +++ b/rocketv/src/FPU.scala @@ -1,237 +1,690 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.tile +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{BaseModule, SerializableModule, SerializableModuleParameter} import chisel3.util._ -import chisel3.{DontCare, WireInit, withClock, withReset} -import chisel3.experimental.SourceInfo -import chisel3.experimental.dataview._ -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.rocket._ -import freechips.rocketchip.rocket.Instructions._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property - -case class FPUParams( - minFLen: Int = 32, - fLen: Int = 64, - divSqrt: Boolean = true, - sfmaLatency: Int = 3, - dfmaLatency: Int = 4 -) - -object FPConstants -{ - val RM_SZ = 3 - val FLAGS_SZ = 5 +import chisel3.util.circt.ClockGate + +object FPUParameter { + implicit def rwP: upickle.default.ReadWriter[FPUParameter] = upickle.default.macroRW[FPUParameter] } -trait HasFPUCtrlSigs { - val ldst = Bool() - val wen = Bool() - val ren1 = Bool() - val ren2 = Bool() - val ren3 = Bool() - val swap12 = Bool() - val swap23 = Bool() - val typeTagIn = UInt(2.W) - val typeTagOut = UInt(2.W) - val fromint = Bool() - val toint = Bool() - val fastpipe = Bool() - val fma = Bool() - val div = Bool() - val sqrt = Bool() - val wflags = Bool() +case class FPUParameter( + useAsyncReset: Boolean, + useClockGating: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int) + extends SerializableModuleParameter + +class FPUInterface(parameter: FPUParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val core = new FPUCoreIO(parameter.hartIdLen, parameter.xLen, parameter.fLen) + val cp_req = Flipped(Decoupled(new FPInput(parameter.fLen))) //cp doesn't pay attn to kill sigs + val cp_resp = Decoupled(new FPResult(parameter.fLen)) } -class FPUCtrlSigs extends Bundle with HasFPUCtrlSigs +// TODO: all hardfloat module can be replaced by DWBB? +@instantiable +class FPU(val parameter: FPUParameter) + extends FixedIORawModule(new FPUInterface(parameter)) + with SerializableModule[FPUParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val helper = new FPUHelper(parameter.minFLen, parameter.minFLen, parameter.xLen) + val typeTagWbOffset = helper.typeTagWbOffset + def recode(x: UInt, tag: UInt): UInt = helper.recode(x, tag) + def consistent(x: UInt): Bool = helper.consistent(x) + def unbox(x: UInt, tag: UInt, exactType: Option[FType]): UInt = helper.unbox(x, tag, exactType) + def box(x: UInt, tag: UInt) = helper.box(x, tag) + def typeTag(t: FType) = helper.typeTag(t) + def sanitizeNaN(x: UInt, t: FType) = helper.sanitizeNaN(x, t) + def maxType = helper.maxType + val fLen = parameter.fLen + val minFLen = parameter.minFLen + val floatTypes = helper.floatTypes + val S = helper.S + val D = helper.D + val H = helper.H + object cfg { + val sfmaLatency = parameter.sfmaLatency + val dfmaLatency = parameter.dfmaLatency + val divSqrt = parameter.divSqrt + } -class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) { - val io = IO(new Bundle { - val inst = Input(Bits(32.W)) - val sigs = Output(new FPUCtrlSigs()) - }) + val useClockGating = parameter.useClockGating + val clock_en_reg = Reg(Bool()) + val clock_en = clock_en_reg || io.cp_req.valid + val gated_clock = + if (!useClockGating) io.clock + else ClockGate(io.clock, clock_en) - private val X2 = BitPat.dontCare(2) + // TODO: remove me. + val fp_decoder = Module(new FPUDecoder(parameter)) + fp_decoder.io.inst := io.core.inst + val id_ctrl = fp_decoder.io.sigs - val default = List(X,X,X,X,X,X,X,X2,X2,X,X,X,X,X,X,X) - val h: Array[(BitPat, List[BitPat])] = - Array(FLH -> List(Y,Y,N,N,N,X,X,X2,X2,N,N,N,N,N,N,N), - FSH -> List(Y,N,N,Y,N,Y,X, I, H,N,Y,N,N,N,N,N), - FMV_H_X -> List(N,Y,N,N,N,X,X, H, I,Y,N,N,N,N,N,N), - FCVT_H_W -> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), - FCVT_H_WU-> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), - FCVT_H_L -> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), - FCVT_H_LU-> List(N,Y,N,N,N,X,X, H, H,Y,N,N,N,N,N,Y), - FMV_X_H -> List(N,N,Y,N,N,N,X, I, H,N,Y,N,N,N,N,N), - FCLASS_H -> List(N,N,Y,N,N,N,X, H, H,N,Y,N,N,N,N,N), - FCVT_W_H -> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), - FCVT_WU_H-> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), - FCVT_L_H -> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), - FCVT_LU_H-> List(N,N,Y,N,N,N,X, H,X2,N,Y,N,N,N,N,Y), - FCVT_S_H -> List(N,Y,Y,N,N,N,X, H, S,N,N,Y,N,N,N,Y), - FCVT_H_S -> List(N,Y,Y,N,N,N,X, S, H,N,N,Y,N,N,N,Y), - FEQ_H -> List(N,N,Y,Y,N,N,N, H, H,N,Y,N,N,N,N,Y), - FLT_H -> List(N,N,Y,Y,N,N,N, H, H,N,Y,N,N,N,N,Y), - FLE_H -> List(N,N,Y,Y,N,N,N, H, H,N,Y,N,N,N,N,Y), - FSGNJ_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,N), - FSGNJN_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,N), - FSGNJX_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,N), - FMIN_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,Y), - FMAX_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,Y,N,N,N,Y), - FADD_H -> List(N,Y,Y,Y,N,N,Y, H, H,N,N,N,Y,N,N,Y), - FSUB_H -> List(N,Y,Y,Y,N,N,Y, H, H,N,N,N,Y,N,N,Y), - FMUL_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,N,Y,N,N,Y), - FMADD_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), - FMSUB_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), - FNMADD_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), - FNMSUB_H -> List(N,Y,Y,Y,Y,N,N, H, H,N,N,N,Y,N,N,Y), - FDIV_H -> List(N,Y,Y,Y,N,N,N, H, H,N,N,N,N,Y,N,Y), - FSQRT_H -> List(N,Y,Y,N,N,N,X, H, H,N,N,N,N,N,Y,Y)) - val f: Array[(BitPat, List[BitPat])] = - Array(FLW -> List(Y,Y,N,N,N,X,X,X2,X2,N,N,N,N,N,N,N), - FSW -> List(Y,N,N,Y,N,Y,X, I, S,N,Y,N,N,N,N,N), - FMV_W_X -> List(N,Y,N,N,N,X,X, S, I,Y,N,N,N,N,N,N), - FCVT_S_W -> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), - FCVT_S_WU-> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), - FCVT_S_L -> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), - FCVT_S_LU-> List(N,Y,N,N,N,X,X, S, S,Y,N,N,N,N,N,Y), - FMV_X_W -> List(N,N,Y,N,N,N,X, I, S,N,Y,N,N,N,N,N), - FCLASS_S -> List(N,N,Y,N,N,N,X, S, S,N,Y,N,N,N,N,N), - FCVT_W_S -> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), - FCVT_WU_S-> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), - FCVT_L_S -> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), - FCVT_LU_S-> List(N,N,Y,N,N,N,X, S,X2,N,Y,N,N,N,N,Y), - FEQ_S -> List(N,N,Y,Y,N,N,N, S, S,N,Y,N,N,N,N,Y), - FLT_S -> List(N,N,Y,Y,N,N,N, S, S,N,Y,N,N,N,N,Y), - FLE_S -> List(N,N,Y,Y,N,N,N, S, S,N,Y,N,N,N,N,Y), - FSGNJ_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,N), - FSGNJN_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,N), - FSGNJX_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,N), - FMIN_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,Y), - FMAX_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,Y,N,N,N,Y), - FADD_S -> List(N,Y,Y,Y,N,N,Y, S, S,N,N,N,Y,N,N,Y), - FSUB_S -> List(N,Y,Y,Y,N,N,Y, S, S,N,N,N,Y,N,N,Y), - FMUL_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,N,Y,N,N,Y), - FMADD_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), - FMSUB_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), - FNMADD_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), - FNMSUB_S -> List(N,Y,Y,Y,Y,N,N, S, S,N,N,N,Y,N,N,Y), - FDIV_S -> List(N,Y,Y,Y,N,N,N, S, S,N,N,N,N,Y,N,Y), - FSQRT_S -> List(N,Y,Y,N,N,N,X, S, S,N,N,N,N,N,Y,Y)) - val d: Array[(BitPat, List[BitPat])] = - Array(FLD -> List(Y,Y,N,N,N,X,X,X2,X2,N,N,N,N,N,N,N), - FSD -> List(Y,N,N,Y,N,Y,X, I, D,N,Y,N,N,N,N,N), - FMV_D_X -> List(N,Y,N,N,N,X,X, D, I,Y,N,N,N,N,N,N), - FCVT_D_W -> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), - FCVT_D_WU-> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), - FCVT_D_L -> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), - FCVT_D_LU-> List(N,Y,N,N,N,X,X, D, D,Y,N,N,N,N,N,Y), - FMV_X_D -> List(N,N,Y,N,N,N,X, I, D,N,Y,N,N,N,N,N), - FCLASS_D -> List(N,N,Y,N,N,N,X, D, D,N,Y,N,N,N,N,N), - FCVT_W_D -> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), - FCVT_WU_D-> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), - FCVT_L_D -> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), - FCVT_LU_D-> List(N,N,Y,N,N,N,X, D,X2,N,Y,N,N,N,N,Y), - FCVT_S_D -> List(N,Y,Y,N,N,N,X, D, S,N,N,Y,N,N,N,Y), - FCVT_D_S -> List(N,Y,Y,N,N,N,X, S, D,N,N,Y,N,N,N,Y), - FEQ_D -> List(N,N,Y,Y,N,N,N, D, D,N,Y,N,N,N,N,Y), - FLT_D -> List(N,N,Y,Y,N,N,N, D, D,N,Y,N,N,N,N,Y), - FLE_D -> List(N,N,Y,Y,N,N,N, D, D,N,Y,N,N,N,N,Y), - FSGNJ_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,N), - FSGNJN_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,N), - FSGNJX_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,N), - FMIN_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,Y), - FMAX_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,Y,N,N,N,Y), - FADD_D -> List(N,Y,Y,Y,N,N,Y, D, D,N,N,N,Y,N,N,Y), - FSUB_D -> List(N,Y,Y,Y,N,N,Y, D, D,N,N,N,Y,N,N,Y), - FMUL_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,N,Y,N,N,Y), - FMADD_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), - FMSUB_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), - FNMADD_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), - FNMSUB_D -> List(N,Y,Y,Y,Y,N,N, D, D,N,N,N,Y,N,N,Y), - FDIV_D -> List(N,Y,Y,Y,N,N,N, D, D,N,N,N,N,Y,N,Y), - FSQRT_D -> List(N,Y,Y,N,N,N,X, D, D,N,N,N,N,N,Y,Y)) - val fcvt_hd: Array[(BitPat, List[BitPat])] = - Array(FCVT_H_D -> List(N,Y,Y,N,N,N,X, D, H,N,N,Y,N,N,N,Y), - FCVT_D_H -> List(N,Y,Y,N,N,N,X, H, D,N,N,Y,N,N,N,Y)) + val ex_reg_valid = RegNext(io.core.valid, false.B) + val ex_reg_inst = RegEnable(io.core.inst, io.core.valid) + val ex_reg_ctrl = RegEnable(id_ctrl, io.core.valid) + val ex_ra = List.fill(3)(Reg(UInt())) - val insns = (minFLen, fLen) match { - case (32, 32) => f - case (16, 32) => h ++ f - case (32, 64) => f ++ d - case (16, 64) => h ++ f ++ d ++ fcvt_hd + // load response + val load_wb = RegNext(io.core.dmem_resp_val) + val load_wb_typeTag = RegEnable(io.core.dmem_resp_type(1, 0) - typeTagWbOffset, io.core.dmem_resp_val) + val load_wb_data = RegEnable(io.core.dmem_resp_data, io.core.dmem_resp_val) + val load_wb_tag = RegEnable(io.core.dmem_resp_tag, io.core.dmem_resp_val) - case other => throw new Exception(s"minFLen = ${minFLen} & fLen = ${fLen} is an unsupported configuration") - } - val decoder = DecodeLogic(io.inst, default, insns) - val s = io.sigs - val sigs = Seq(s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, - s.swap23, s.typeTagIn, s.typeTagOut, s.fromint, s.toint, - s.fastpipe, s.fma, s.div, s.sqrt, s.wflags) - sigs zip decoder map {case(s,d) => s := d} -} + class FPUImpl { // entering gated-clock domain + + val req_valid = ex_reg_valid || io.cp_req.valid + val ex_cp_valid = io.cp_req.fire + val mem_cp_valid = RegNext(ex_cp_valid, false.B) + val wb_cp_valid = RegNext(mem_cp_valid, false.B) + val mem_reg_valid = RegInit(false.B) + val killm = (io.core.killm || io.core.nack_mem) && !mem_cp_valid + // Kill X-stage instruction if M-stage is killed. This prevents it from + // speculatively being sent to the div-sqrt unit, which can cause priority + // inversion for two back-to-back divides, the first of which is killed. + val killx = io.core.killx || mem_reg_valid && killm + mem_reg_valid := ex_reg_valid && !killx || ex_cp_valid + val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) + val wb_reg_valid = RegNext(mem_reg_valid && (!killm || mem_cp_valid), false.B) + + val cp_ctrl = Wire(new FPUCtrlSigs) + cp_ctrl :<>= io.cp_req.bits.fpuControl + io.cp_resp.valid := false.B + io.cp_resp.bits.data := 0.U + io.cp_resp.bits.exc := DontCare + + val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, ex_reg_ctrl) + val mem_ctrl = RegEnable(ex_ctrl, req_valid) + val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) + + // regfile + val regfile = Mem(32, Bits((fLen + 1).W)) + when(load_wb) { + val wdata = recode(load_wb_data, load_wb_typeTag) + regfile(load_wb_tag) := wdata + assert(consistent(wdata)) + } + + val ex_rs = ex_ra.map(a => regfile(a)) + when(io.core.valid) { + when(id_ctrl.ren1) { + when(!id_ctrl.swap12) { ex_ra(0) := io.core.inst(19, 15) } + when(id_ctrl.swap12) { ex_ra(1) := io.core.inst(19, 15) } + } + when(id_ctrl.ren2) { + when(id_ctrl.swap12) { ex_ra(0) := io.core.inst(24, 20) } + when(id_ctrl.swap23) { ex_ra(2) := io.core.inst(24, 20) } + when(!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra(1) := io.core.inst(24, 20) } + } + when(id_ctrl.ren3) { ex_ra(2) := io.core.inst(31, 27) } + } + val ex_rm = Mux(ex_reg_inst(14, 12) === 7.U, io.core.fcsr_rm, ex_reg_inst(14, 12)) + + def fuInput(minT: Option[FType]): FPInput = { + val req = Wire(new FPInput(fLen)) + val tag = ex_ctrl.typeTagIn + req.fpuControl :#= ex_ctrl + req.rm := ex_rm + req.in1 := unbox(ex_rs(0), tag, minT) + req.in2 := unbox(ex_rs(1), tag, minT) + req.in3 := unbox(ex_rs(2), tag, minT) + req.typ := ex_reg_inst(21, 20) + req.fmt := ex_reg_inst(26, 25) + req.fmaCmd := ex_reg_inst(3, 2) | (!ex_ctrl.ren3 && ex_reg_inst(27)) + when(ex_cp_valid) { + req := io.cp_req.bits + when(io.cp_req.bits.fpuControl.swap23) { + req.in2 := io.cp_req.bits.in3 + req.in3 := io.cp_req.bits.in2 + } + } + req + } + + val sfma = Instantiate( + new FPUFMAPipe( + FPUFMAPipeParameter( + parameter.useAsyncReset, + parameter.sfmaLatency, + parameter.xLen, + parameter.fLen, + parameter.minFLen, + FType.S + ) + ) + ) + sfma.io.clock := io.clock + sfma.io.reset := io.reset + sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === S + sfma.io.in.bits := fuInput(Some(FType.S /*sfma.t*/ )) + + val fpiu = Instantiate( + new FPToInt( + FPToIntParameter( + parameter.useAsyncReset, + parameter.xLen, + parameter.fLen, + parameter.minFLen + ) + ) + ) + fpiu.io.clock := io.clock + fpiu.io.reset := io.reset + fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags)) + fpiu.io.in.bits := fuInput(None) + io.core.store_data := fpiu.io.out.bits.store + io.core.toint_data := fpiu.io.out.bits.toint + when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint) { + io.cp_resp.bits.data := fpiu.io.out.bits.toint + io.cp_resp.valid := true.B + } -class FPUCoreIO(implicit p: Parameters) extends CoreBundle()(p) { - val hartid = Input(UInt(hartIdLen.W)) - val time = Input(UInt(xLen.W)) + val ifpu = Instantiate( + new IntToFP( + IntToFPParameter( + parameter.useAsyncReset, + 2, + parameter.fLen, + parameter.xLen, + parameter.minFLen + ) + ) + ) + ifpu.io.clock := io.clock + ifpu.io.reset := io.reset + ifpu.io.in.valid := req_valid && ex_ctrl.fromint + ifpu.io.in.bits := fpiu.io.in.bits + ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.core.fromint_data) + + val fpmu = Instantiate( + new FPToFP( + FPToFPParameter( + parameter.useAsyncReset, + 2, + parameter.xLen, + parameter.fLen, + parameter.minFLen + ) + ) + ) + fpmu.io.clock := io.clock + fpmu.io.reset := io.reset + fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe + fpmu.io.in.bits := fpiu.io.in.bits + fpmu.io.lt := fpiu.io.out.bits.lt + + val divSqrt_wen = WireDefault(false.B) + val divSqrt_inFlight = WireDefault(false.B) + val divSqrt_waddr = Reg(UInt(5.W)) + val divSqrt_typeTag = Wire(UInt(log2Ceil(floatTypes.size).W)) + val divSqrt_wdata = Wire(UInt((parameter.fLen + 1).W)) + val divSqrt_flags = Wire(UInt(FPConstants.FLAGS_SZ.W)) + divSqrt_typeTag := DontCare + divSqrt_wdata := DontCare + divSqrt_flags := DontCare + // writeback arbitration + case class Pipe[T <: BaseModule](p: Instance[T], lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) + val dfma = Option.when(fLen > 32)( + Instantiate( + new FPUFMAPipe( + FPUFMAPipeParameter( + parameter.useAsyncReset, + parameter.dfmaLatency, + parameter.xLen, + parameter.fLen, + parameter.minFLen, + FType.D + ) + ) + ) + ) + val hfma = Option.when(minFLen == 16)( + Instantiate( + new FPUFMAPipe( + FPUFMAPipeParameter( + parameter.useAsyncReset, + parameter.sfmaLatency, + parameter.xLen, + parameter.fLen, + parameter.minFLen, + FType.H + ) + ) + ) + ) + dfma.foreach { dfma => + dfma.io.clock := io.clock + dfma.io.reset := io.reset + dfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === D + dfma.io.in.bits := fuInput(Some(FType.D /*dfma.t*/ )) + } + hfma.foreach { hfma => + hfma.io.clock := io.clock + hfma.io.reset := io.reset + hfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === H + hfma.io.in.bits := fuInput(Some(FType.H /*hfma.t*/ )) + } + val pipes = List( + Pipe(fpmu, 2, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), + Pipe(ifpu, 2, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), + Pipe(sfma, cfg.sfmaLatency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === S, sfma.io.out.bits) + ) ++ + dfma.map(dfma => + Pipe(dfma, cfg.dfmaLatency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === D, dfma.io.out.bits) + ) ++ + hfma.map(hfma => Pipe(hfma, cfg.sfmaLatency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === H, hfma.io.out.bits)) + def latencyMask(c: FPUCtrlSigs, offset: Int) = { + require(pipes.forall(_.lat >= offset)) + pipes.map(p => Mux(p.cond(c), (1 << p.lat - offset).U, 0.U)).reduce(_ | _) + } + def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), p._2.U, 0.U)).reduce(_ | _) + val maxLatency = pipes.map(_.lat).max + val memLatencyMask = latencyMask(mem_ctrl, 2) + + class WBInfo extends Bundle { + val rd = UInt(5.W) + val typeTag = UInt(log2Ceil(floatTypes.size).W) + val cp = Bool() + val pipeid = UInt(log2Ceil(pipes.size).W) + } - val inst = Input(Bits(32.W)) - val fromint_data = Input(Bits(xLen.W)) + val wen = RegInit(0.U((maxLatency - 1).W)) + val wbInfo = Reg(Vec(maxLatency - 1, new WBInfo)) + val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) + val write_port_busy = RegEnable( + mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, + req_valid + ) - val fcsr_rm = Input(Bits(FPConstants.RM_SZ.W)) - val fcsr_flags = Valid(Bits(FPConstants.FLAGS_SZ.W)) + for (i <- 0 until maxLatency - 2) { + when(wen(i + 1)) { wbInfo(i) := wbInfo(i + 1) } + } + wen := wen >> 1 + when(mem_wen) { + when(!killm) { + wen := wen >> 1 | memLatencyMask + } + for (i <- 0 until maxLatency - 1) { + when(!write_port_busy && memLatencyMask(i)) { + wbInfo(i).cp := mem_cp_valid + wbInfo(i).typeTag := mem_ctrl.typeTagOut + wbInfo(i).pipeid := pipeid(mem_ctrl) + wbInfo(i).rd := mem_reg_inst(11, 7) + } + } + } - val store_data = Output(Bits(fLen.W)) - val toint_data = Output(Bits(xLen.W)) + val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd) + val wtypeTag = Mux(divSqrt_wen, divSqrt_typeTag, wbInfo(0).typeTag) + val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, VecInit(pipes.map(_.res.data))(wbInfo(0).pipeid)), wtypeTag) + val wexc = VecInit(pipes.map(_.res.exc))(wbInfo(0).pipeid) + when((!wbInfo(0).cp && wen(0)) || divSqrt_wen) { + assert(consistent(wdata)) + regfile(waddr) := wdata + } - val dmem_resp_val = Input(Bool()) - val dmem_resp_type = Input(Bits(3.W)) - val dmem_resp_tag = Input(UInt(5.W)) - val dmem_resp_data = Input(Bits(fLen.W)) + when(wbInfo(0).cp && wen(0)) { + io.cp_resp.bits.data := wdata + io.cp_resp.valid := true.B + } + io.cp_req.ready := !ex_reg_valid + + val wb_toint_valid = wb_reg_valid && wb_ctrl.toint + val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) + io.core.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) + io.core.fcsr_flags.bits := + Mux(wb_toint_valid, wb_toint_exc, 0.U) | + Mux(divSqrt_wen, divSqrt_flags, 0.U) | + Mux(wen(0), wexc, 0.U) + + val divSqrt_write_port_busy = (mem_ctrl.div || mem_ctrl.sqrt) && wen.orR + io.core.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_inFlight) + io.core.nack_mem := write_port_busy || divSqrt_write_port_busy || divSqrt_inFlight + io.core.dec <> fp_decoder.io.sigs + def useScoreboard(f: ((Pipe[_], Int)) => Bool) = + pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(false.B)(_ || _) + io.core.sboard_set := wb_reg_valid && !wb_cp_valid && RegNext( + useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt + ) + io.core.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === x._2.U))) + io.core.sboard_clra := waddr + + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + // we don't currently support round-max-magnitude (rm=4) + io.core.illegal_rm := isOneOf(io.core.inst(14, 12), Seq(5.U, 6.U)) || io.core.inst( + 14, + 12 + ) === 7.U && io.core.fcsr_rm >= 5.U + + if (cfg.divSqrt) { + val divSqrt_inValid = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_inFlight + val divSqrt_killed = RegNext(divSqrt_inValid && killm, true.B) + when(divSqrt_inValid) { + divSqrt_waddr := mem_reg_inst(11, 7) + } - val valid = Input(Bool()) - val fcsr_rdy = Output(Bool()) - val nack_mem = Output(Bool()) - val illegal_rm = Output(Bool()) - val killx = Input(Bool()) - val killm = Input(Bool()) - val dec = Output(new FPUCtrlSigs()) - val sboard_set = Output(Bool()) - val sboard_clr = Output(Bool()) - val sboard_clra = Output(UInt(5.W)) + for (t <- floatTypes) { + val tag = mem_ctrl.typeTagOut + val divSqrt = withReset(divSqrt_killed) { Module(new hardfloat.DivSqrtRecFN_small(t.exp, t.sig, 0)) } + divSqrt.io.inValid := divSqrt_inValid && tag === typeTag(t).U + divSqrt.io.sqrtOp := mem_ctrl.sqrt + divSqrt.io.a := maxType.unsafeConvert(fpiu.io.out.bits.in.in1, t) + divSqrt.io.b := maxType.unsafeConvert(fpiu.io.out.bits.in.in2, t) + divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm + divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding + + when(!divSqrt.io.inReady) { divSqrt_inFlight := true.B } // only 1 in flight + + when(divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt) { + divSqrt_wen := !divSqrt_killed + divSqrt_wdata := sanitizeNaN(divSqrt.io.out, t) + divSqrt_flags := divSqrt.io.exceptionFlags + divSqrt_typeTag := typeTag(t).U + } + } - val keep_clock_enabled = Input(Bool()) -} + when(divSqrt_killed) { divSqrt_inFlight := false.B } + } else { + when(id_ctrl.div || id_ctrl.sqrt) { io.core.illegal_rm := true.B } + } -class FPUIO(implicit p: Parameters) extends FPUCoreIO ()(p) { - val cp_req = Flipped(Decoupled(new FPInput())) //cp doesn't pay attn to kill sigs - val cp_resp = Decoupled(new FPResult()) + // gate the clock + clock_en_reg := !useClockGating.B || + io.core.keep_clock_enabled || // chicken bit + io.core.valid || // ID stage + req_valid || // EX stage + mem_reg_valid || mem_cp_valid || // MEM stage + wb_reg_valid || wb_cp_valid || // WB stage + wen.orR || divSqrt_inFlight || // post-WB stage + io.core.dmem_resp_val // load writeback + + } // leaving gated-clock domain + val fpuImpl = withClockAndReset(gated_clock, io.reset) { new FPUImpl } } -class FPResult(implicit p: Parameters) extends CoreBundle()(p) { - val data = Bits((fLen+1).W) - val exc = Bits(FPConstants.FLAGS_SZ.W) +class FPUDecoderInterface(parameter: FPUParameter) extends Bundle { + val inst = Input(UInt(32.W)) + val sigs = Output(new FPUCtrlSigs()) } -class IntToFPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs { - val rm = Bits(FPConstants.RM_SZ.W) - val typ = Bits(2.W) - val in1 = Bits(xLen.W) +// TODO: we should eliminate this module and move it to CoreDecoder with optional fields. +class FPUDecoder(val parameter: FPUParameter) + extends FixedIORawModule(new FPUDecoderInterface(parameter)) + with SerializableModule[FPUParameter] { + val X2 = BitPat.dontCare(2) + val X = BitPat.dontCare(1) + val N = BitPat.N() + val Y = BitPat.N() + val helper = new FPUHelper(parameter.minFLen, parameter.minFLen, parameter.xLen) + // TODO: wtf here. + def H = BitPat(helper.H) + def I = BitPat(helper.I) + def D = BitPat(helper.D) + def S = BitPat(helper.S) + + def FADD_D = BitPat("b0000001??????????????????1010011") + def FADD_H = BitPat("b0000010??????????????????1010011") + def FADD_S = BitPat("b0000000??????????????????1010011") + def FCLASS_D = BitPat("b111000100000?????001?????1010011") + def FCLASS_H = BitPat("b111001000000?????001?????1010011") + def FCLASS_S = BitPat("b111000000000?????001?????1010011") + def FCVT_D_H = BitPat("b010000100010?????????????1010011") + def FCVT_D_L = BitPat("b110100100010?????????????1010011") + def FCVT_D_LU = BitPat("b110100100011?????????????1010011") + def FCVT_D_S = BitPat("b010000100000?????????????1010011") + def FCVT_D_W = BitPat("b110100100000?????????????1010011") + def FCVT_D_WU = BitPat("b110100100001?????????????1010011") + def FCVT_H_D = BitPat("b010001000001?????????????1010011") + def FCVT_H_L = BitPat("b110101000010?????????????1010011") + def FCVT_H_LU = BitPat("b110101000011?????????????1010011") + def FCVT_H_S = BitPat("b010001000000?????????????1010011") + def FCVT_H_W = BitPat("b110101000000?????????????1010011") + def FCVT_H_WU = BitPat("b110101000001?????????????1010011") + def FCVT_L_D = BitPat("b110000100010?????????????1010011") + def FCVT_L_H = BitPat("b110001000010?????????????1010011") + def FCVT_L_S = BitPat("b110000000010?????????????1010011") + def FCVT_LU_D = BitPat("b110000100011?????????????1010011") + def FCVT_LU_H = BitPat("b110001000011?????????????1010011") + def FCVT_LU_S = BitPat("b110000000011?????????????1010011") + def FCVT_S_D = BitPat("b010000000001?????????????1010011") + def FCVT_S_H = BitPat("b010000000010?????????????1010011") + def FCVT_S_L = BitPat("b110100000010?????????????1010011") + def FCVT_S_LU = BitPat("b110100000011?????????????1010011") + def FCVT_S_W = BitPat("b110100000000?????????????1010011") + def FCVT_S_WU = BitPat("b110100000001?????????????1010011") + def FCVT_W_D = BitPat("b110000100000?????????????1010011") + def FCVT_W_H = BitPat("b110001000000?????????????1010011") + def FCVT_W_S = BitPat("b110000000000?????????????1010011") + def FCVT_WU_D = BitPat("b110000100001?????????????1010011") + def FCVT_WU_H = BitPat("b110001000001?????????????1010011") + def FCVT_WU_S = BitPat("b110000000001?????????????1010011") + def FDIV_D = BitPat("b0001101??????????????????1010011") + def FDIV_H = BitPat("b0001110??????????????????1010011") + def FDIV_S = BitPat("b0001100??????????????????1010011") + def FEQ_D = BitPat("b1010001??????????010?????1010011") + def FEQ_H = BitPat("b1010010??????????010?????1010011") + def FEQ_S = BitPat("b1010000??????????010?????1010011") + def FLD = BitPat("b?????????????????011?????0000111") + def FLE_D = BitPat("b1010001??????????000?????1010011") + def FLE_H = BitPat("b1010010??????????000?????1010011") + def FLE_S = BitPat("b1010000??????????000?????1010011") + def FLH = BitPat("b?????????????????001?????0000111") + def FLT_D = BitPat("b1010001??????????001?????1010011") + def FLT_H = BitPat("b1010010??????????001?????1010011") + def FLT_S = BitPat("b1010000??????????001?????1010011") + def FLW = BitPat("b?????????????????010?????0000111") + def FMADD_D = BitPat("b?????01??????????????????1000011") + def FMADD_H = BitPat("b?????10??????????????????1000011") + def FMADD_S = BitPat("b?????00??????????????????1000011") + def FMAX_D = BitPat("b0010101??????????001?????1010011") + def FMAX_H = BitPat("b0010110??????????001?????1010011") + def FMAX_S = BitPat("b0010100??????????001?????1010011") + def FMIN_D = BitPat("b0010101??????????000?????1010011") + def FMIN_H = BitPat("b0010110??????????000?????1010011") + def FMIN_S = BitPat("b0010100??????????000?????1010011") + def FMSUB_D = BitPat("b?????01??????????????????1000111") + def FMSUB_H = BitPat("b?????10??????????????????1000111") + def FMSUB_S = BitPat("b?????00??????????????????1000111") + def FMUL_D = BitPat("b0001001??????????????????1010011") + def FMUL_H = BitPat("b0001010??????????????????1010011") + def FMUL_S = BitPat("b0001000??????????????????1010011") + def FMV_D_X = BitPat("b111100100000?????000?????1010011") + def FMV_H_X = BitPat("b111101000000?????000?????1010011") + def FMV_W_X = BitPat("b111100000000?????000?????1010011") + def FMV_X_D = BitPat("b111000100000?????000?????1010011") + def FMV_X_H = BitPat("b111001000000?????000?????1010011") + def FMV_X_W = BitPat("b111000000000?????000?????1010011") + def FNMADD_D = BitPat("b?????01??????????????????1001111") + def FNMADD_H = BitPat("b?????10??????????????????1001111") + def FNMADD_S = BitPat("b?????00??????????????????1001111") + def FNMSUB_D = BitPat("b?????01??????????????????1001011") + def FNMSUB_H = BitPat("b?????10??????????????????1001011") + def FNMSUB_S = BitPat("b?????00??????????????????1001011") + def FSD = BitPat("b?????????????????011?????0100111") + def FSGNJ_D = BitPat("b0010001??????????000?????1010011") + def FSGNJ_H = BitPat("b0010010??????????000?????1010011") + def FSGNJ_S = BitPat("b0010000??????????000?????1010011") + def FSGNJN_D = BitPat("b0010001??????????001?????1010011") + def FSGNJN_H = BitPat("b0010010??????????001?????1010011") + def FSGNJN_S = BitPat("b0010000??????????001?????1010011") + def FSGNJX_D = BitPat("b0010001??????????010?????1010011") + def FSGNJX_H = BitPat("b0010010??????????010?????1010011") + def FSGNJX_S = BitPat("b0010000??????????010?????1010011") + def FSH = BitPat("b?????????????????001?????0100111") + def FSQRT_D = BitPat("b010110100000?????????????1010011") + def FSQRT_H = BitPat("b010111000000?????????????1010011") + def FSQRT_S = BitPat("b010110000000?????????????1010011") + def FSUB_D = BitPat("b0000101??????????????????1010011") + def FSUB_H = BitPat("b0000110??????????????????1010011") + def FSUB_S = BitPat("b0000100??????????????????1010011") + def FSW = BitPat("b?????????????????010?????0100111") + + val default = List(X, X, X, X, X, X, X, X2, X2, X, X, X, X, X, X, X) + val h: Array[(BitPat, List[BitPat])] = + Array( + FLH -> List(Y, Y, N, N, N, X, X, X2, X2, N, N, N, N, N, N, N), + FSH -> List(Y, N, N, Y, N, Y, X, I, H, N, Y, N, N, N, N, N), + FMV_H_X -> List(N, Y, N, N, N, X, X, H, I, Y, N, N, N, N, N, N), + FCVT_H_W -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FCVT_H_WU -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FCVT_H_L -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FCVT_H_LU -> List(N, Y, N, N, N, X, X, H, H, Y, N, N, N, N, N, Y), + FMV_X_H -> List(N, N, Y, N, N, N, X, I, H, N, Y, N, N, N, N, N), + FCLASS_H -> List(N, N, Y, N, N, N, X, H, H, N, Y, N, N, N, N, N), + FCVT_W_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_WU_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_L_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_LU_H -> List(N, N, Y, N, N, N, X, H, X2, N, Y, N, N, N, N, Y), + FCVT_S_H -> List(N, Y, Y, N, N, N, X, H, S, N, N, Y, N, N, N, Y), + FCVT_H_S -> List(N, Y, Y, N, N, N, X, S, H, N, N, Y, N, N, N, Y), + FEQ_H -> List(N, N, Y, Y, N, N, N, H, H, N, Y, N, N, N, N, Y), + FLT_H -> List(N, N, Y, Y, N, N, N, H, H, N, Y, N, N, N, N, Y), + FLE_H -> List(N, N, Y, Y, N, N, N, H, H, N, Y, N, N, N, N, Y), + FSGNJ_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, N), + FSGNJN_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, N), + FSGNJX_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, N), + FMIN_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, Y), + FMAX_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, Y, N, N, N, Y), + FADD_H -> List(N, Y, Y, Y, N, N, Y, H, H, N, N, N, Y, N, N, Y), + FSUB_H -> List(N, Y, Y, Y, N, N, Y, H, H, N, N, N, Y, N, N, Y), + FMUL_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, N, Y, N, N, Y), + FMADD_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FMSUB_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FNMADD_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FNMSUB_H -> List(N, Y, Y, Y, Y, N, N, H, H, N, N, N, Y, N, N, Y), + FDIV_H -> List(N, Y, Y, Y, N, N, N, H, H, N, N, N, N, Y, N, Y), + FSQRT_H -> List(N, Y, Y, N, N, N, X, H, H, N, N, N, N, N, Y, Y) + ) + val f: Array[(BitPat, List[BitPat])] = + Array( + FLW -> List(Y, Y, N, N, N, X, X, X2, X2, N, N, N, N, N, N, N), + FSW -> List(Y, N, N, Y, N, Y, X, I, S, N, Y, N, N, N, N, N), + FMV_W_X -> List(N, Y, N, N, N, X, X, S, I, Y, N, N, N, N, N, N), + FCVT_S_W -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FCVT_S_WU -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FCVT_S_L -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FCVT_S_LU -> List(N, Y, N, N, N, X, X, S, S, Y, N, N, N, N, N, Y), + FMV_X_W -> List(N, N, Y, N, N, N, X, I, S, N, Y, N, N, N, N, N), + FCLASS_S -> List(N, N, Y, N, N, N, X, S, S, N, Y, N, N, N, N, N), + FCVT_W_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FCVT_WU_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FCVT_L_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FCVT_LU_S -> List(N, N, Y, N, N, N, X, S, X2, N, Y, N, N, N, N, Y), + FEQ_S -> List(N, N, Y, Y, N, N, N, S, S, N, Y, N, N, N, N, Y), + FLT_S -> List(N, N, Y, Y, N, N, N, S, S, N, Y, N, N, N, N, Y), + FLE_S -> List(N, N, Y, Y, N, N, N, S, S, N, Y, N, N, N, N, Y), + FSGNJ_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, N), + FSGNJN_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, N), + FSGNJX_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, N), + FMIN_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, Y), + FMAX_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, Y, N, N, N, Y), + FADD_S -> List(N, Y, Y, Y, N, N, Y, S, S, N, N, N, Y, N, N, Y), + FSUB_S -> List(N, Y, Y, Y, N, N, Y, S, S, N, N, N, Y, N, N, Y), + FMUL_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, N, Y, N, N, Y), + FMADD_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FMSUB_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FNMADD_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FNMSUB_S -> List(N, Y, Y, Y, Y, N, N, S, S, N, N, N, Y, N, N, Y), + FDIV_S -> List(N, Y, Y, Y, N, N, N, S, S, N, N, N, N, Y, N, Y), + FSQRT_S -> List(N, Y, Y, N, N, N, X, S, S, N, N, N, N, N, Y, Y) + ) + val d: Array[(BitPat, List[BitPat])] = + Array( + FLD -> List(Y, Y, N, N, N, X, X, X2, X2, N, N, N, N, N, N, N), + FSD -> List(Y, N, N, Y, N, Y, X, I, D, N, Y, N, N, N, N, N), + FMV_D_X -> List(N, Y, N, N, N, X, X, D, I, Y, N, N, N, N, N, N), + FCVT_D_W -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FCVT_D_WU -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FCVT_D_L -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FCVT_D_LU -> List(N, Y, N, N, N, X, X, D, D, Y, N, N, N, N, N, Y), + FMV_X_D -> List(N, N, Y, N, N, N, X, I, D, N, Y, N, N, N, N, N), + FCLASS_D -> List(N, N, Y, N, N, N, X, D, D, N, Y, N, N, N, N, N), + FCVT_W_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_WU_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_L_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_LU_D -> List(N, N, Y, N, N, N, X, D, X2, N, Y, N, N, N, N, Y), + FCVT_S_D -> List(N, Y, Y, N, N, N, X, D, S, N, N, Y, N, N, N, Y), + FCVT_D_S -> List(N, Y, Y, N, N, N, X, S, D, N, N, Y, N, N, N, Y), + FEQ_D -> List(N, N, Y, Y, N, N, N, D, D, N, Y, N, N, N, N, Y), + FLT_D -> List(N, N, Y, Y, N, N, N, D, D, N, Y, N, N, N, N, Y), + FLE_D -> List(N, N, Y, Y, N, N, N, D, D, N, Y, N, N, N, N, Y), + FSGNJ_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, N), + FSGNJN_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, N), + FSGNJX_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, N), + FMIN_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, Y), + FMAX_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, Y, N, N, N, Y), + FADD_D -> List(N, Y, Y, Y, N, N, Y, D, D, N, N, N, Y, N, N, Y), + FSUB_D -> List(N, Y, Y, Y, N, N, Y, D, D, N, N, N, Y, N, N, Y), + FMUL_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, N, Y, N, N, Y), + FMADD_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FMSUB_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FNMADD_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FNMSUB_D -> List(N, Y, Y, Y, Y, N, N, D, D, N, N, N, Y, N, N, Y), + FDIV_D -> List(N, Y, Y, Y, N, N, N, D, D, N, N, N, N, Y, N, Y), + FSQRT_D -> List(N, Y, Y, N, N, N, X, D, D, N, N, N, N, N, Y, Y) + ) + val fcvt_hd: Array[(BitPat, List[BitPat])] = + Array( + FCVT_H_D -> List(N, Y, Y, N, N, N, X, D, H, N, N, Y, N, N, N, Y), + FCVT_D_H -> List(N, Y, Y, N, N, N, X, H, D, N, N, Y, N, N, N, Y) + ) + + val insns = (parameter.minFLen, parameter.fLen) match { + case (32, 32) => f + case (16, 32) => h ++ f + case (32, 64) => f ++ d + case (16, 64) => h ++ f ++ d ++ fcvt_hd + case other => + throw new Exception(s"minFLen = ${parameter.minFLen} & fLen = ${parameter.fLen} is an unsupported configuration") + } + val decoder = DecodeLogic(io.inst, default, insns) + val s = io.sigs + val sigs = Seq( + s.ldst, + s.wen, + s.ren1, + s.ren2, + s.ren3, + s.swap12, + s.swap23, + s.typeTagIn, + s.typeTagOut, + s.fromint, + s.toint, + s.fastpipe, + s.fma, + s.div, + s.sqrt, + s.wflags + ) + sigs.zip(decoder).foreach { case (s, d) => s := d } } -class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs { - val rm = Bits(FPConstants.RM_SZ.W) - val fmaCmd = Bits(2.W) - val typ = Bits(2.W) - val fmt = Bits(2.W) - val in1 = Bits((fLen+1).W) - val in2 = Bits((fLen+1).W) - val in3 = Bits((fLen+1).W) +object FType { + implicit def rwP: upickle.default.ReadWriter[FType] = upickle.default.macroRW[FType] + + val H = FType(5, 11) + val S = FType(8, 24) + val D = FType(11, 53) + val all = List(H, S, D) } case class FType(exp: Int, sig: Int) { @@ -240,7 +693,7 @@ case class FType(exp: Int, sig: Int) { def ieeeQNaN = ((BigInt(1) << (ieeeWidth - 1)) - (BigInt(1) << (sig - 2))).U(ieeeWidth.W) def qNaN = ((BigInt(7) << (exp + sig - 3)) + (BigInt(1) << (sig - 2))).U(recodedWidth.W) - def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR + def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2) def classify(x: UInt) = { @@ -255,16 +708,26 @@ case class FType(exp: Int, sig: Int) { val isZero = code === 0.U val isInf = isSpecial && !code(0) val isNaN = code.andR - val isSNaN = isNaN && !x(sig-2) - val isQNaN = isNaN && x(sig-2) - - Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign, - isSubnormal && !sign, isZero && !sign, isZero && sign, - isSubnormal && sign, isNormal && sign, isInf && sign) + val isSNaN = isNaN && !x(sig - 2) + val isQNaN = isNaN && x(sig - 2) + + Cat( + isQNaN, + isSNaN, + isInf && !sign, + isNormal && !sign, + isSubnormal && !sign, + isZero && !sign, + isZero && sign, + isSubnormal && sign, + isNormal && sign, + isInf && sign + ) } // convert between formats, ignoring rounding, range, NaN - def unsafeConvert(x: UInt, to: FType) = if (this == to) x else { + def unsafeConvert(x: UInt, to: FType) = if (this == to) x + else { val sign = x(sig + exp) val fractIn = x(sig - 2, 0) val expIn = x(sig + exp - 1, sig - 1) @@ -282,7 +745,7 @@ case class FType(exp: Int, sig: Int) { class IEEEBundle extends Bundle { val sign = Bool() val exp = UInt(expWidth.W) - val sig = UInt((ieeeWidth-expWidth-1).W) + val sig = UInt((ieeeWidth - expWidth - 1).W) } new IEEEBundle } @@ -290,24 +753,14 @@ case class FType(exp: Int, sig: Int) { def unpackIEEE(x: UInt) = x.asTypeOf(ieeeBundle) def recode(x: UInt) = hardfloat.recFNFromFN(exp, sig, x) - def ieee(x: UInt) = hardfloat.fNFromRecFN(exp, sig, x) -} - -object FType { - val H = new FType(5, 11) - val S = new FType(8, 24) - val D = new FType(11, 53) - - val all = List(H, S, D) + def ieee(x: UInt) = hardfloat.fNFromRecFN(exp, sig, x) } -trait HasFPUParameters { +// TODO: migrate into FPUParameter +class FPUHelper(minFLen: Int, fLen: Int, xLen: Int) { require(fLen == 0 || FType.all.exists(_.ieeeWidth == fLen)) - val minFLen: Int - val fLen: Int - def xLen: Int val minXLen = 32 - val nIntTypes = log2Ceil(xLen/minXLen) + 1 + val nIntTypes = log2Ceil(xLen / minXLen) + 1 def floatTypes = FType.all.filter(t => minFLen <= t.ieeeWidth && t.ieeeWidth <= fLen) def minType = floatTypes.head def maxType = floatTypes.last @@ -334,7 +787,8 @@ trait HasFPUParameters { y(yt.recodedWidth - 2), x(xt.sig - 2, yt.recodedWidth - 1), y(yt.recodedWidth - 1), - y(yt.recodedWidth - 3, 0)) + y(yt.recodedWidth - 3, 0) + ) Mux(xt.isNaN(x), swizzledNaN, x) } @@ -347,10 +801,7 @@ trait HasFPUParameters { Seq() } else { val prevT = prevType(t) - val unswizzled = Cat( - x(prevT.sig + prevT.exp - 1), - x(t.sig - 1), - x(prevT.sig + prevT.exp - 2, 0)) + val unswizzled = Cat(x(prevT.sig + prevT.exp - 1), x(t.sig - 1), x(prevT.sig + prevT.exp - 2, 0)) val prev = helper(unswizzled, prevT) val isbox = isBox(x, t) prev.map(p => (isbox && p._1, p._2)) @@ -358,9 +809,9 @@ trait HasFPUParameters { prev :+ (true.B, t.unsafeConvert(x, outType)) } - val (oks: Seq[Bool], floats) = helper(x, maxType).unzip + val (oks, floats) = helper(x, maxType).unzip if (exactType.isEmpty || floatTypes.size == 1) { - Mux(oks(tag), floats(tag), maxType.qNaN) + Mux(VecInit(oks)(tag), VecInit(floats)(tag), maxType.qNaN) } else { val t = exactType.get floats(typeTag(t)) | Mux(oks(typeTag(t)), 0.U, t.qNaN) @@ -369,12 +820,10 @@ trait HasFPUParameters { // make sure that the redundant bits in the NaN-boxed encoding are consistent def consistent(x: UInt): Bool = { - def helper(x: UInt, t: FType): Bool = if (typeTag(t) == 0) true.B else { + def helper(x: UInt, t: FType): Bool = if (typeTag(t) == 0) true.B + else { val prevT = prevType(t) - val unswizzled = Cat( - x(prevT.sig + prevT.exp - 1), - x(t.sig - 1), - x(prevT.sig + prevT.exp - 2, 0)) + val unswizzled = Cat(x(prevT.sig + prevT.exp - 1), x(t.sig - 1), x(prevT.sig + prevT.exp - 2, 0)) val prevOK = !isBox(x, t) || helper(unswizzled, prevT) val curOK = !t.isNaN(x) || x(t.sig + t.exp - 4) === x(t.sig - 2, prevT.recodedWidth - 1).andR prevOK && curOK @@ -388,7 +837,7 @@ trait HasFPUParameters { x } else { val nt = floatTypes(typeTag(t) + 1) - val bigger = box(((BigInt(1) << nt.recodedWidth)-1).U, nt, x, t) + val bigger = box(((BigInt(1) << nt.recodedWidth) - 1).U, nt, x, t) bigger | ((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth)).U } } @@ -396,7 +845,7 @@ trait HasFPUParameters { // generate a NaN box from an FU result def box(x: UInt, tag: UInt): UInt = { val opts = floatTypes.map(t => box(x, t)) - opts(tag) + VecInit(opts)(tag) } // zap bits that hardfloat thinks are don't-cares, but we do care about @@ -404,7 +853,7 @@ trait HasFPUParameters { if (typeTag(t) == 0) { x } else { - val maskedNaN = x & ~((BigInt(1) << (t.sig-1)) | (BigInt(1) << (t.sig+t.exp-4))).U(t.recodedWidth.W) + val maskedNaN = x & ~((BigInt(1) << (t.sig - 1)) | (BigInt(1) << (t.sig + t.exp - 4))).U(t.recodedWidth.W) Mux(t.isNaN(x), maskedNaN, x) } } @@ -422,7 +871,7 @@ trait HasFPUParameters { // fill MSBs of subword loads to emulate a wider load of a NaN-boxed value val boxes = floatTypes.map(t => ((BigInt(1) << maxType.ieeeWidth) - (BigInt(1) << t.ieeeWidth)).U) - helper(boxes(tag) | x, maxType) + helper(VecInit(boxes)(tag) | x, maxType) } // implement NaN unboxing and un-recoding for FS*/fmv.x.* @@ -432,321 +881,9 @@ trait HasFPUParameters { } else { val unrecoded = t.ieee(x) val prevT = prevType(t) - val prevRecoded = Cat( - x(prevT.recodedWidth-2), - x(t.sig-1), - x(prevT.recodedWidth-3, 0)) + val prevRecoded = Cat(x(prevT.recodedWidth - 2), x(t.sig - 1), x(prevT.recodedWidth - 3, 0)) val prevUnrecoded = ieee(prevRecoded, prevT) - Cat(unrecoded >> prevT.ieeeWidth, Mux(t.isNaN(x), prevUnrecoded, unrecoded(prevT.ieeeWidth-1, 0))) - } - } -} - -abstract class FPUModule(implicit val p: Parameters) extends Module with HasCoreParameters with HasFPUParameters - -class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { - val io = IO(new FPUIO) - - val (useClockGating, useDebugROB) = coreParams match { - case r: RocketCoreParams => - val sz = if (r.debugROB.isDefined) r.debugROB.get.size else 1 - (r.clockGate, sz < 1) - case _ => (false, false) - } - val clock_en_reg = Reg(Bool()) - val clock_en = clock_en_reg || io.cp_req.valid - val gated_clock = - if (!useClockGating) clock - else ClockGate(clock, clock_en, "fpu_clock_gate") - - val fp_decoder = Module(new FPUDecoder) - fp_decoder.io.inst := io.inst - val id_ctrl = fp_decoder.io.sigs - - val ex_reg_valid = RegNext(io.valid, false.B) - val ex_reg_inst = RegEnable(io.inst, io.valid) - val ex_reg_ctrl = RegEnable(id_ctrl, io.valid) - val ex_ra = List.fill(3)(Reg(UInt())) - - // load response - val load_wb = RegNext(io.dmem_resp_val) - val load_wb_typeTag = RegEnable(io.dmem_resp_type(1,0) - typeTagWbOffset, io.dmem_resp_val) - val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val) - val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val) - - class FPUImpl { // entering gated-clock domain - - val req_valid = ex_reg_valid || io.cp_req.valid - val ex_cp_valid = io.cp_req.fire - val mem_cp_valid = RegNext(ex_cp_valid, false.B) - val wb_cp_valid = RegNext(mem_cp_valid, false.B) - val mem_reg_valid = RegInit(false.B) - val killm = (io.killm || io.nack_mem) && !mem_cp_valid - // Kill X-stage instruction if M-stage is killed. This prevents it from - // speculatively being sent to the div-sqrt unit, which can cause priority - // inversion for two back-to-back divides, the first of which is killed. - val killx = io.killx || mem_reg_valid && killm - mem_reg_valid := ex_reg_valid && !killx || ex_cp_valid - val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) - val wb_reg_valid = RegNext(mem_reg_valid && (!killm || mem_cp_valid), false.B) - - val cp_ctrl = Wire(new FPUCtrlSigs) - cp_ctrl :<>= io.cp_req.bits.viewAsSupertype(new FPUCtrlSigs) - io.cp_resp.valid := false.B - io.cp_resp.bits.data := 0.U - io.cp_resp.bits.exc := DontCare - - val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, ex_reg_ctrl) - val mem_ctrl = RegEnable(ex_ctrl, req_valid) - val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) - - // CoreMonitorBundle to monitor fp register file writes - val frfWriteBundle = Seq.fill(2)(WireInit(new CoreMonitorBundle(xLen, fLen), DontCare)) - frfWriteBundle.foreach { i => - i.clock := clock - i.reset := reset - i.hartid := io.hartid - i.timer := io.time(31,0) - i.valid := false.B - i.wrenx := false.B - i.wrenf := false.B - i.excpt := false.B - } - - // regfile - val regfile = Mem(32, Bits((fLen+1).W)) - when (load_wb) { - val wdata = recode(load_wb_data, load_wb_typeTag) - regfile(load_wb_tag) := wdata - assert(consistent(wdata)) - if (enableCommitLog) - printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32.U, ieee(wdata)) - if (useDebugROB) - DebugROB.pushWb(clock, reset, io.hartid, load_wb, load_wb_tag + 32.U, ieee(wdata)) - frfWriteBundle(0).wrdst := load_wb_tag - frfWriteBundle(0).wrenf := true.B - frfWriteBundle(0).wrdata := ieee(wdata) - } - - val ex_rs = ex_ra.map(a => regfile(a)) - when (io.valid) { - when (id_ctrl.ren1) { - when (!id_ctrl.swap12) { ex_ra(0) := io.inst(19,15) } - when (id_ctrl.swap12) { ex_ra(1) := io.inst(19,15) } - } - when (id_ctrl.ren2) { - when (id_ctrl.swap12) { ex_ra(0) := io.inst(24,20) } - when (id_ctrl.swap23) { ex_ra(2) := io.inst(24,20) } - when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra(1) := io.inst(24,20) } - } - when (id_ctrl.ren3) { ex_ra(2) := io.inst(31,27) } - } - val ex_rm = Mux(ex_reg_inst(14,12) === 7.U, io.fcsr_rm, ex_reg_inst(14,12)) - - def fuInput(minT: Option[FType]): FPInput = { - val req = Wire(new FPInput) - val tag = ex_ctrl.typeTagIn - req.viewAsSupertype(new Bundle with HasFPUCtrlSigs) :#= ex_ctrl.viewAsSupertype(new Bundle with HasFPUCtrlSigs) - req.rm := ex_rm - req.in1 := unbox(ex_rs(0), tag, minT) - req.in2 := unbox(ex_rs(1), tag, minT) - req.in3 := unbox(ex_rs(2), tag, minT) - req.typ := ex_reg_inst(21,20) - req.fmt := ex_reg_inst(26,25) - req.fmaCmd := ex_reg_inst(3,2) | (!ex_ctrl.ren3 && ex_reg_inst(27)) - when (ex_cp_valid) { - req := io.cp_req.bits - when (io.cp_req.bits.swap23) { - req.in2 := io.cp_req.bits.in3 - req.in3 := io.cp_req.bits.in2 - } - } - req - } - - val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.S)) - sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === S - sfma.io.in.bits := fuInput(Some(sfma.t)) - - val fpiu = Module(new FPToInt) - fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags)) - fpiu.io.in.bits := fuInput(None) - io.store_data := fpiu.io.out.bits.store - io.toint_data := fpiu.io.out.bits.toint - when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){ - io.cp_resp.bits.data := fpiu.io.out.bits.toint - io.cp_resp.valid := true.B - } - - val ifpu = Module(new IntToFP(2)) - ifpu.io.in.valid := req_valid && ex_ctrl.fromint - ifpu.io.in.bits := fpiu.io.in.bits - ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data) - - val fpmu = Module(new FPToFP(2)) - fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe - fpmu.io.in.bits := fpiu.io.in.bits - fpmu.io.lt := fpiu.io.out.bits.lt - - val divSqrt_wen = WireDefault(false.B) - val divSqrt_inFlight = WireDefault(false.B) - val divSqrt_waddr = Reg(UInt(5.W)) - val divSqrt_typeTag = Wire(UInt(log2Up(floatTypes.size).W)) - val divSqrt_wdata = Wire(UInt((fLen+1).W)) - val divSqrt_flags = Wire(UInt(FPConstants.FLAGS_SZ.W)) - divSqrt_typeTag := DontCare - divSqrt_wdata := DontCare - divSqrt_flags := DontCare - // writeback arbitration - case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) - val pipes = List( - Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), - Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), - Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === S, sfma.io.out.bits)) ++ - (fLen > 32).option({ - val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, FType.D)) - dfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === D - dfma.io.in.bits := fuInput(Some(dfma.t)) - Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === D, dfma.io.out.bits) - }) ++ - (minFLen == 16).option({ - val hfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.H)) - hfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.typeTagOut === H - hfma.io.in.bits := fuInput(Some(hfma.t)) - Pipe(hfma, hfma.latency, (c: FPUCtrlSigs) => c.fma && c.typeTagOut === H, hfma.io.out.bits) - }) - def latencyMask(c: FPUCtrlSigs, offset: Int) = { - require(pipes.forall(_.lat >= offset)) - pipes.map(p => Mux(p.cond(c), (1 << p.lat-offset).U, 0.U)).reduce(_|_) - } - def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), p._2.U, 0.U)).reduce(_|_) - val maxLatency = pipes.map(_.lat).max - val memLatencyMask = latencyMask(mem_ctrl, 2) - - class WBInfo extends Bundle { - val rd = UInt(5.W) - val typeTag = UInt(log2Up(floatTypes.size).W) - val cp = Bool() - val pipeid = UInt(log2Ceil(pipes.size).W) - } - - val wen = RegInit(0.U((maxLatency-1).W)) - val wbInfo = Reg(Vec(maxLatency-1, new WBInfo)) - val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) - val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) - ccover(mem_reg_valid && write_port_busy, "WB_STRUCTURAL", "structural hazard on writeback") - - for (i <- 0 until maxLatency-2) { - when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) } - } - wen := wen >> 1 - when (mem_wen) { - when (!killm) { - wen := wen >> 1 | memLatencyMask - } - for (i <- 0 until maxLatency-1) { - when (!write_port_busy && memLatencyMask(i)) { - wbInfo(i).cp := mem_cp_valid - wbInfo(i).typeTag := mem_ctrl.typeTagOut - wbInfo(i).pipeid := pipeid(mem_ctrl) - wbInfo(i).rd := mem_reg_inst(11,7) - } + Cat(unrecoded >> prevT.ieeeWidth, Mux(t.isNaN(x), prevUnrecoded, unrecoded(prevT.ieeeWidth - 1, 0))) } } - - val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd) - val wtypeTag = Mux(divSqrt_wen, divSqrt_typeTag, wbInfo(0).typeTag) - val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)), wtypeTag) - val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid) - when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) { - assert(consistent(wdata)) - regfile(waddr) := wdata - if (enableCommitLog) { - printf("f%d p%d 0x%x\n", waddr, waddr + 32.U, ieee(wdata)) - } - frfWriteBundle(1).wrdst := waddr - frfWriteBundle(1).wrenf := true.B - frfWriteBundle(1).wrdata := ieee(wdata) - } - if (useDebugROB) { - DebugROB.pushWb(clock, reset, io.hartid, (!wbInfo(0).cp && wen(0)) || divSqrt_wen, waddr + 32.U, ieee(wdata)) - } - - when (wbInfo(0).cp && wen(0)) { - io.cp_resp.bits.data := wdata - io.cp_resp.valid := true.B - } - io.cp_req.ready := !ex_reg_valid - - val wb_toint_valid = wb_reg_valid && wb_ctrl.toint - val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) - io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) - io.fcsr_flags.bits := - Mux(wb_toint_valid, wb_toint_exc, 0.U) | - Mux(divSqrt_wen, divSqrt_flags, 0.U) | - Mux(wen(0), wexc, 0.U) - - val divSqrt_write_port_busy = (mem_ctrl.div || mem_ctrl.sqrt) && wen.orR - io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_inFlight) - io.nack_mem := write_port_busy || divSqrt_write_port_busy || divSqrt_inFlight - io.dec <> fp_decoder.io.sigs - def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(false.B)(_||_) - io.sboard_set := wb_reg_valid && !wb_cp_valid && RegNext(useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt) - io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === x._2.U))) - io.sboard_clra := waddr - ccover(io.sboard_clr && load_wb, "DUAL_WRITEBACK", "load and FMA writeback on same cycle") - // we don't currently support round-max-magnitude (rm=4) - io.illegal_rm := io.inst(14,12).isOneOf(5.U, 6.U) || io.inst(14,12) === 7.U && io.fcsr_rm >= 5.U - - if (cfg.divSqrt) { - val divSqrt_inValid = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_inFlight - val divSqrt_killed = RegNext(divSqrt_inValid && killm, true.B) - when (divSqrt_inValid) { - divSqrt_waddr := mem_reg_inst(11,7) - } - - ccover(divSqrt_inFlight && divSqrt_killed, "DIV_KILLED", "divide killed after issued to divider") - ccover(divSqrt_inFlight && mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt), "DIV_BUSY", "divider structural hazard") - ccover(mem_reg_valid && divSqrt_write_port_busy, "DIV_WB_STRUCTURAL", "structural hazard on division writeback") - - for (t <- floatTypes) { - val tag = mem_ctrl.typeTagOut - val divSqrt = withReset(divSqrt_killed) { Module(new hardfloat.DivSqrtRecFN_small(t.exp, t.sig, 0)) } - divSqrt.io.inValid := divSqrt_inValid && tag === typeTag(t).U - divSqrt.io.sqrtOp := mem_ctrl.sqrt - divSqrt.io.a := maxType.unsafeConvert(fpiu.io.out.bits.in.in1, t) - divSqrt.io.b := maxType.unsafeConvert(fpiu.io.out.bits.in.in2, t) - divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm - divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding - - when (!divSqrt.io.inReady) { divSqrt_inFlight := true.B } // only 1 in flight - - when (divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt) { - divSqrt_wen := !divSqrt_killed - divSqrt_wdata := sanitizeNaN(divSqrt.io.out, t) - divSqrt_flags := divSqrt.io.exceptionFlags - divSqrt_typeTag := typeTag(t).U - } - } - - when (divSqrt_killed) { divSqrt_inFlight := false.B } - } else { - when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true.B } - } - - // gate the clock - clock_en_reg := !useClockGating.B || - io.keep_clock_enabled || // chicken bit - io.valid || // ID stage - req_valid || // EX stage - mem_reg_valid || mem_cp_valid || // MEM stage - wb_reg_valid || wb_cp_valid || // WB stage - wen.orR || divSqrt_inFlight || // post-WB stage - io.dmem_resp_val // load writeback - - } // leaving gated-clock domain - val fpuImpl = withClock (gated_clock) { new FPUImpl } - - def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - property.cover(cond, s"FPU_$label", "Core;;" + desc) } diff --git a/rocketv/src/fpu/FPToFP.scala b/rocketv/src/fpu/FPToFP.scala index 3df4558ce..1762877d2 100644 --- a/rocketv/src/fpu/FPToFP.scala +++ b/rocketv/src/fpu/FPToFP.scala @@ -1,29 +1,63 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.tile +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util.{Cat, Pipe, Valid} -import freechips.rocketchip.tile.{FPInput, FPResult, FPUModule} -class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { - val io = IO(new Bundle { - val in = Flipped(Valid(new FPInput)) - val out = Valid(new FPResult) - val lt = Input(Bool()) // from FPToInt - }) +object FPToFPParameter { + implicit def rwP: upickle.default.ReadWriter[FPToFPParameter] = upickle.default.macroRW[FPToFPParameter] +} + +case class FPToFPParameter( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int) + extends SerializableModuleParameter + +class FPToFPInterface(parameter: FPToFPParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new FPInput(parameter.fLen))) + val out = Valid(new FPResult(parameter.fLen)) + val lt = Input(Bool()) // from FPToInt +} + +@instantiable +class FPToFP(val parameter: FPToFPParameter) + extends FixedIORawModule(new FPToFPInterface(parameter)) + with SerializableModule[FPToFPParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val fLen = parameter.fLen + val minFLen = parameter.minFLen + val xLen = parameter.xLen + val latency = parameter.latency + val helper = new FPUHelper(minFLen, fLen, xLen) + val maxType = helper.maxType + val floatTypes = helper.floatTypes + def typeTag(t: FType) = helper.typeTag(t) + def sanitizeNaN(x: UInt, t: FType) = helper.sanitizeNaN(x, t) val in = Pipe(io.in) val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2)) - val fsgnj = Cat(signNum(fLen), in.bits.in1(fLen-1, 0)) + val fsgnj = Cat(signNum(fLen), in.bits.in1(fLen - 1, 0)) - val fsgnjMux = Wire(new FPResult) + val fsgnjMux = Wire(new FPResult(parameter.fLen)) fsgnjMux.exc := 0.U fsgnjMux.data := fsgnj - when (in.bits.wflags) { // fmin/fmax + when(in.bits.fpuControl.wflags) { // fmin/fmax val isnan1 = maxType.isNaN(in.bits.in1) val isnan2 = maxType.isNaN(in.bits.in2) val isInvalid = maxType.isSNaN(in.bits.in1) || maxType.isSNaN(in.bits.in2) @@ -33,16 +67,16 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) wi fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2)) } - val inTag = in.bits.typeTagIn - val outTag = in.bits.typeTagOut + val inTag = in.bits.fpuControl.typeTagIn + val outTag = in.bits.fpuControl.typeTagOut val mux = WireDefault(fsgnjMux) for (t <- floatTypes.init) { - when (outTag === typeTag(t).U) { + when(outTag === typeTag(t).U) { mux.data := Cat(fsgnjMux.data >> t.recodedWidth, maxType.unsafeConvert(fsgnjMux.data, t)) } } - when (in.bits.wflags && !in.bits.ren2) { // fcvt + when(in.bits.fpuControl.wflags && !in.bits.fpuControl.ren2) { // fcvt if (floatTypes.size > 1) { // widening conversions simply canonicalize NaN operands val widened = Mux(maxType.isNaN(in.bits.in1), maxType.qNaN, in.bits.in1) @@ -52,17 +86,18 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) wi // narrowing conversions require rounding (for RVQ, this could be // optimized to use a single variable-position rounding unit, rather // than two fixed-position ones) - for (outType <- floatTypes.init) when (outTag === typeTag(outType).U && ((typeTag(outType) == 0).B || outTag < inTag)) { - val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig)) - narrower.io.in := in.bits.in1 - narrower.io.roundingMode := in.bits.rm - narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding - val narrowed = sanitizeNaN(narrower.io.out, outType) - mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed) - mux.exc := narrower.io.exceptionFlags - } + for (outType <- floatTypes.init) + when(outTag === typeTag(outType).U && ((typeTag(outType) == 0).B || outTag < inTag)) { + val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig)) + narrower.io.in := in.bits.in1 + narrower.io.roundingMode := in.bits.rm + narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding + val narrowed = sanitizeNaN(narrower.io.out, outType) + mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed) + mux.exc := narrower.io.exceptionFlags + } } } - io.out <> Pipe(in.valid, mux, latency-1) + io.out <> Pipe(in.valid, mux, latency - 1) } diff --git a/rocketv/src/fpu/FPToInt.scala b/rocketv/src/fpu/FPToInt.scala index d1607e463..9faa3f3ef 100644 --- a/rocketv/src/fpu/FPToInt.scala +++ b/rocketv/src/fpu/FPToInt.scala @@ -1,55 +1,94 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.tile +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{Cat, Fill, RegEnable, Valid, log2Ceil} -import freechips.rocketchip.tile.{FPConstants, FPInput, FPUModule, FType} +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ -class FPToInt(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { - class Output extends Bundle { - val in = new FPInput - val lt = Bool() - val store = Bits(fLen.W) - val toint = Bits(xLen.W) - val exc = Bits(FPConstants.FLAGS_SZ.W) - } - val io = IO(new Bundle { - val in = Flipped(Valid(new FPInput)) - val out = Valid(new Output) - }) +object FPToIntParameter { + implicit def rwP: upickle.default.ReadWriter[FPToIntParameter] = upickle.default.macroRW[FPToIntParameter] +} + +case class FPToIntParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int) + extends SerializableModuleParameter +class FPToIntInterface(parameter: FPToIntParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new FPInput(parameter.fLen))) + val out = Valid(new FPToIntOutput(parameter.fLen, parameter.xLen)) +} + +@instantiable +class FPToInt(val parameter: FPToIntParameter) + extends FixedIORawModule(new FPToIntInterface(parameter)) + with SerializableModule[FPToIntParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val minFLen: Int = parameter.minFLen + val fLen: Int = parameter.fLen + val xLen: Int = parameter.xLen + val helper = new FPUHelper(minFLen, fLen, xLen) + val maxExpWidth = helper.maxExpWidth + val maxSigWidth = helper.maxSigWidth + val floatTypes = helper.floatTypes + val maxType = helper.maxType + val minXLen = helper.minXLen + val nIntTypes = helper.nIntTypes + def ieee(x: UInt, t: FType = maxType) = helper.ieee(x, t) val in = RegEnable(io.in.bits, io.in.valid) val valid = RegNext(io.in.valid) + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth)) dcmp.io.a := in.in1 dcmp.io.b := in.in2 dcmp.io.signaling := !in.rm(1) - val tag = in.typeTagOut - val store = (floatTypes.map(t => if (t == FType.H) Fill(maxType.ieeeWidth / minXLen, ieee(in.in1)(15, 0).sextTo(minXLen)) - else Fill(maxType.ieeeWidth / t.ieeeWidth, ieee(in.in1)(t.ieeeWidth - 1, 0))): Seq[UInt])(tag) + val tag = in.fpuControl.typeTagOut + val store = VecInit( + floatTypes.map(t => + if (t == FType.H) Fill(maxType.ieeeWidth / minXLen, sextTo(ieee(in.in1)(15, 0), minXLen)) + else Fill(maxType.ieeeWidth / t.ieeeWidth, ieee(in.in1)(t.ieeeWidth - 1, 0)) + ) + )(tag) val toint = WireDefault(store) val intType = WireDefault(in.fmt(0)) io.out.bits.store := store - io.out.bits.toint := ((0 until nIntTypes).map(i => toint((minXLen << i) - 1, 0).sextTo(xLen)): Seq[UInt])(intType) + io.out.bits.toint := VecInit( + (0 until helper.nIntTypes).map(i => sextTo(toint((helper.minXLen << i) - 1, 0), xLen)): Seq[UInt] + )(intType) io.out.bits.exc := 0.U - when (in.rm(0)) { - val classify_out = (floatTypes.map(t => t.classify(maxType.unsafeConvert(in.in1, t))): Seq[UInt])(tag) + when(in.rm(0)) { + val classify_out = VecInit(floatTypes.map(t => t.classify(maxType.unsafeConvert(in.in1, t))))(tag) toint := classify_out | (store >> minXLen << minXLen) intType := false.B } - when (in.wflags) { // feq/flt/fle, fcvt + when(in.fpuControl.wflags) { // feq/flt/fle, fcvt toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR | (store >> minXLen << minXLen) io.out.bits.exc := dcmp.io.exceptionFlags intType := false.B - when (!in.ren2) { // fcvt - val cvtType = in.typ.extract(log2Ceil(nIntTypes), 1) + when(!in.fpuControl.ren2) { // fcvt + val cvtType = if (log2Ceil(nIntTypes) == 0) 0.U else in.typ(log2Ceil(nIntTypes), 1) intType := cvtType val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, xLen)) conv.io.in := in.in1 @@ -58,18 +97,18 @@ class FPToInt(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetime toint := conv.io.out io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, 0.U(3.W), conv.io.intExceptionFlags(0)) - for (i <- 0 until nIntTypes-1) { + for (i <- 0 until nIntTypes - 1) { val w = minXLen << i - when (cvtType === i.U) { + when(cvtType === i.U) { val narrow = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, w)) narrow.io.in := in.in1 narrow.io.roundingMode := in.rm narrow.io.signedOut := ~in.typ(0) val excSign = in.in1(maxExpWidth + maxSigWidth) && !maxType.isNaN(in.in1) - val excOut = Cat(conv.io.signedOut === excSign, Fill(w-1, !excSign)) + val excOut = Cat(conv.io.signedOut === excSign, Fill(w - 1, !excSign)) val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1) - when (invalid) { toint := Cat(conv.io.out >> w, excOut) } + when(invalid) { toint := Cat(conv.io.out >> w, excOut) } io.out.bits.exc := Cat(invalid, 0.U(3.W), !invalid && conv.io.intExceptionFlags(0)) } } diff --git a/rocketv/src/fpu/FPUFMAPipe.scala b/rocketv/src/fpu/FPUFMAPipe.scala index eefccae3f..761ffd5a4 100644 --- a/rocketv/src/fpu/FPUFMAPipe.scala +++ b/rocketv/src/fpu/FPUFMAPipe.scala @@ -1,34 +1,70 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.tile +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util.{Pipe, Valid} -import chisel3.{Bundle, Flipped, Module, Reg, RegNext, Wire, when} -import freechips.rocketchip.tile.{FPInput, FPResult, FPUModule, FType, MulAddRecFNPipe} -class FPUFMAPipe(val latency: Int, val t: FType) - (implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { - require(latency>0) +object FPUFMAPipeParameter { + implicit def rwP: upickle.default.ReadWriter[FPUFMAPipeParameter] = upickle.default.macroRW[FPUFMAPipeParameter] +} + +case class FPUFMAPipeParameter( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int, + t: FType) + extends SerializableModuleParameter { + require(latency > 0) +} + +class FPUFMAPipeInterface(parameter: FPUFMAPipeParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new FPInput(parameter.fLen))) + val out = Valid(new FPResult(parameter.fLen)) +} + +@instantiable +class FPUFMAPipe(val parameter: FPUFMAPipeParameter) + extends FixedIORawModule(new FPUFMAPipeInterface(parameter)) + with SerializableModule[FPUFMAPipeParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset - val io = IO(new Bundle { - val in = Flipped(Valid(new FPInput)) - val out = Valid(new FPResult) - }) + val fLen = parameter.fLen + val t = parameter.t + val minFLen: Int = parameter.minFLen + val xLen: Int = parameter.xLen + val latency: Int = parameter.latency + val helper = new FPUHelper(minFLen, fLen, xLen) + def sanitizeNaN(x: UInt, t: FType): UInt = helper.sanitizeNaN(x, t) val valid = RegNext(io.in.valid) - val in = Reg(new FPInput) - when (io.in.valid) { + val in = Reg(new FPInput(fLen)) + when(io.in.valid) { val one = 1.U << (t.sig + t.exp - 1) val zero = (io.in.bits.in1 ^ io.in.bits.in2) & (1.U << (t.sig + t.exp)) - val cmd_fma = io.in.bits.ren3 - val cmd_addsub = io.in.bits.swap23 + val cmd_fma = io.in.bits.fpuControl.ren3 + val cmd_addsub = io.in.bits.fpuControl.swap23 in := io.in.bits - when (cmd_addsub) { in.in2 := one } - when (!(cmd_fma || cmd_addsub)) { in.in3 := zero } + when(cmd_addsub) { in.in2 := one } + when(!(cmd_fma || cmd_addsub)) { in.in3 := zero } } - val fma = Module(new MulAddRecFNPipe((latency-1) min 2, t.exp, t.sig)) + val fma: Instance[MulAddRecFNPipe] = Instantiate( + new MulAddRecFNPipe(MulAddRecFNPipeParameter(parameter.useAsyncReset, (latency - 1).min(2), t.exp, t.sig)) + ) + fma.io.clock := io.clock + fma.io.reset := io.reset fma.io.validin := valid fma.io.op := in.fmaCmd fma.io.roundingMode := in.rm @@ -37,9 +73,9 @@ class FPUFMAPipe(val latency: Int, val t: FType) fma.io.b := in.in2 fma.io.c := in.in3 - val res = Wire(new FPResult) + val res = Wire(new FPResult(parameter.fLen)) res.data := sanitizeNaN(fma.io.out, t) res.exc := fma.io.exceptionFlags - io.out := Pipe(fma.io.validout, res, (latency-3) max 0) + io.out := Pipe(fma.io.validout, res, (latency - 3).max(0)) } diff --git a/rocketv/src/fpu/IntToFP.scala b/rocketv/src/fpu/IntToFP.scala index 12cb87405..e2be64c01 100644 --- a/rocketv/src/fpu/IntToFP.scala +++ b/rocketv/src/fpu/IntToFP.scala @@ -1,37 +1,74 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.tile +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util.{Cat, Pipe, Valid, log2Ceil} -import freechips.rocketchip.tile.{FPResult, FPUModule, IntToFPInput} -class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) with ShouldBeRetimed { - val io = IO(new Bundle { - val in = Flipped(Valid(new IntToFPInput)) - val out = Valid(new FPResult) - }) +object IntToFPParameter { + implicit def rwP: upickle.default.ReadWriter[IntToFPParameter] = upickle.default.macroRW[IntToFPParameter] +} + +case class IntToFPParameter( + useAsyncReset: Boolean, + latency: Int, + fLen: Int, + xLen: Int, + minFLen: Int) + extends SerializableModuleParameter { + val minXLen = 32 +} +class IntToFPInterface(parameter: IntToFPParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val in = Flipped(Valid(new IntToFPInput(parameter.xLen))) + val out = Valid(new FPResult(parameter.fLen)) +} + +@instantiable +class IntToFP(val parameter: IntToFPParameter) + extends FixedIORawModule(new IntToFPInterface(parameter)) + with SerializableModule[IntToFPParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + // retime + val latency: Int = parameter.latency + val fLen: Int = parameter.fLen + val minFLen: Int = parameter.minFLen + val minXLen: Int = parameter.minXLen + val xLen: Int = parameter.xLen + val helper = new FPUHelper(minFLen: Int, fLen: Int, xLen: Int) + def recode(x: UInt, tag: UInt) = helper.recode(x, tag) + val nIntTypes: Int = helper.nIntTypes + val floatTypes: Seq[FType] = helper.floatTypes + def sanitizeNaN(x: UInt, t: FType) = helper.sanitizeNaN(x, t) val in = Pipe(io.in) - val tag = in.bits.typeTagIn + val tag = in.bits.fpuControl.typeTagIn - val mux = Wire(new FPResult) + val mux = Wire(new FPResult(fLen)) mux.exc := 0.U mux.data := recode(in.bits.in1, tag) val intValue = { val res = WireDefault(in.bits.in1.asSInt) - for (i <- 0 until nIntTypes-1) { + for (i <- 0 until nIntTypes - 1) { val smallInt = in.bits.in1((minXLen << i) - 1, 0) - when (in.bits.typ.extract(log2Ceil(nIntTypes), 1) === i.U) { + when(in.bits.typ(log2Ceil(nIntTypes), 1) === i.U) { res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt) } } res.asUInt } - when (in.bits.wflags) { // fcvt + when(in.bits.fpuControl.wflags) { // fcvt // could be improved for RVD/RVQ with a single variable-position rounding // unit, rather than N fixed-position ones val i2fResults = for (t <- floatTypes) yield { @@ -45,9 +82,9 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) w val (data, exc) = i2fResults.unzip val dataPadded = data.init.map(d => Cat(data.last >> d.getWidth, d)) :+ data.last - mux.data := dataPadded(tag) - mux.exc := exc(tag) + mux.data := VecInit(dataPadded)(tag) + mux.exc := VecInit(exc)(tag) } - io.out <> Pipe(in.valid, mux, latency-1) + io.out <> Pipe(in.valid, mux, latency - 1) } diff --git a/rocketv/src/fpu/MulAddRecFNPipe.scala b/rocketv/src/fpu/MulAddRecFNPipe.scala index 27ab41767..48dc9e4b9 100644 --- a/rocketv/src/fpu/MulAddRecFNPipe.scala +++ b/rocketv/src/fpu/MulAddRecFNPipe.scala @@ -1,29 +1,54 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.tile - +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util.Pipe -class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int) extends Module -{ - require(latency<=2) - - val io = IO(new Bundle { - val validin = Input(Bool()) - val op = Input(Bits(2.W)) - val a = Input(Bits((expWidth + sigWidth + 1).W)) - val b = Input(Bits((expWidth + sigWidth + 1).W)) - val c = Input(Bits((expWidth + sigWidth + 1).W)) - val roundingMode = Input(UInt(3.W)) - val detectTininess = Input(UInt(1.W)) - val out = Output(Bits((expWidth + sigWidth + 1).W)) - val exceptionFlags = Output(Bits(5.W)) - val validout = Output(Bool()) - }) - +object MulAddRecFNPipeParameter { + implicit def rwP: upickle.default.ReadWriter[MulAddRecFNPipeParameter] = upickle.default.macroRW[MulAddRecFNPipeParameter] +} + +case class MulAddRecFNPipeParameter( + useAsyncReset: Boolean, + latency: Int, + expWidth: Int, + sigWidth: Int) + extends SerializableModuleParameter { + require(latency <= 2) +} + +class MulAddRecFNPipeInterface(parameter: MulAddRecFNPipeParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val validin = Input(Bool()) + val op = Input(UInt(2.W)) + val a = Input(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val b = Input(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val c = Input(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val roundingMode = Input(UInt(3.W)) + val detectTininess = Input(UInt(1.W)) + val out = Output(UInt((parameter.expWidth + parameter.sigWidth + 1).W)) + val exceptionFlags = Output(UInt(5.W)) + val validout = Output(Bool()) +} + +@instantiable +class MulAddRecFNPipe(val parameter: MulAddRecFNPipeParameter) + extends FixedIORawModule(new MulAddRecFNPipeInterface(parameter)) + with SerializableModule[MulAddRecFNPipeParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val latency: Int = parameter.latency + val expWidth: Int = parameter.expWidth + val sigWidth: Int = parameter.sigWidth //------------------------------------------------------------------------ //------------------------------------------------------------------------ @@ -31,9 +56,9 @@ class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int) extends Module val mulAddRecFNToRaw_postMul = Module(new hardfloat.MulAddRecFNToRaw_postMul(expWidth, sigWidth)) mulAddRecFNToRaw_preMul.io.op := io.op - mulAddRecFNToRaw_preMul.io.a := io.a - mulAddRecFNToRaw_preMul.io.b := io.b - mulAddRecFNToRaw_preMul.io.c := io.c + mulAddRecFNToRaw_preMul.io.a := io.a + mulAddRecFNToRaw_preMul.io.b := io.b + mulAddRecFNToRaw_preMul.io.c := io.c val mulAddResult = (mulAddRecFNToRaw_preMul.io.mulAddA * @@ -44,28 +69,28 @@ class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int) extends Module val roundingMode_stage0 = Wire(UInt(3.W)) val detectTininess_stage0 = Wire(UInt(1.W)) - val postmul_regs = if(latency>0) 1 else 0 - mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe(io.validin, mulAddRecFNToRaw_preMul.io.toPostMul, postmul_regs).bits + val postmul_regs = if (latency > 0) 1 else 0 + mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe(io.validin, mulAddRecFNToRaw_preMul.io.toPostMul, postmul_regs).bits mulAddRecFNToRaw_postMul.io.mulAddResult := Pipe(io.validin, mulAddResult, postmul_regs).bits mulAddRecFNToRaw_postMul.io.roundingMode := Pipe(io.validin, io.roundingMode, postmul_regs).bits - roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits - detectTininess_stage0 := Pipe(io.validin, io.detectTininess, postmul_regs).bits - valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid + roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits + detectTininess_stage0 := Pipe(io.validin, io.detectTininess, postmul_regs).bits + valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid //------------------------------------------------------------------------ //------------------------------------------------------------------------ val roundRawFNToRecFN = Module(new hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0)) - val round_regs = if(latency==2) 1 else 0 - roundRawFNToRecFN.io.invalidExc := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.invalidExc, round_regs).bits - roundRawFNToRecFN.io.in := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.rawOut, round_regs).bits - roundRawFNToRecFN.io.roundingMode := Pipe(valid_stage0, roundingMode_stage0, round_regs).bits - roundRawFNToRecFN.io.detectTininess := Pipe(valid_stage0, detectTininess_stage0, round_regs).bits - io.validout := Pipe(valid_stage0, false.B, round_regs).valid + val round_regs = if (latency == 2) 1 else 0 + roundRawFNToRecFN.io.invalidExc := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.invalidExc, round_regs).bits + roundRawFNToRecFN.io.in := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.rawOut, round_regs).bits + roundRawFNToRecFN.io.roundingMode := Pipe(valid_stage0, roundingMode_stage0, round_regs).bits + roundRawFNToRecFN.io.detectTininess := Pipe(valid_stage0, detectTininess_stage0, round_regs).bits + io.validout := Pipe(valid_stage0, false.B, round_regs).valid roundRawFNToRecFN.io.infiniteExc := false.B - io.out := roundRawFNToRecFN.io.out + io.out := roundRawFNToRecFN.io.out io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags -} \ No newline at end of file +} From 10e14f1468fb99a26537d6f34beebdd89a780ab4 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 14:10:54 +0800 Subject: [PATCH 066/140] [rocketv] add elaborator for FPU - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.FPU config --useAsyncReset true --useClockGating true --xLen 32 --fLen 32 --minFLen 32 --sfmaLatency 3 --dfmaLatency 4 --divSqrt true --hartIdLen 1 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.FPU design --parameter ./FPU.json --run-firtool --- elaborator/src/rocketv/FPToFP.scala | 36 ++++++++++++++++ elaborator/src/rocketv/FPToInt.scala | 34 +++++++++++++++ elaborator/src/rocketv/FPU.scala | 44 +++++++++++++++++++ elaborator/src/rocketv/FPUFMAPipe.scala | 45 ++++++++++++++++++++ elaborator/src/rocketv/IntToFP.scala | 36 ++++++++++++++++ elaborator/src/rocketv/MulAddRecFNPipe.scala | 30 +++++++++++++ 6 files changed, 225 insertions(+) create mode 100644 elaborator/src/rocketv/FPToFP.scala create mode 100644 elaborator/src/rocketv/FPToInt.scala create mode 100644 elaborator/src/rocketv/FPU.scala create mode 100644 elaborator/src/rocketv/FPUFMAPipe.scala create mode 100644 elaborator/src/rocketv/IntToFP.scala create mode 100644 elaborator/src/rocketv/MulAddRecFNPipe.scala diff --git a/elaborator/src/rocketv/FPToFP.scala b/elaborator/src/rocketv/FPToFP.scala new file mode 100644 index 000000000..c35094b93 --- /dev/null +++ b/elaborator/src/rocketv/FPToFP.scala @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPToFP, FPToFPParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPToFP extends Elaborator { + @main + case class FPToFPParameterMain( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int) { + def convert: FPToFPParameter = FPToFPParameter( + useAsyncReset, + latency, + xLen, + fLen, + minFLen + ) + } + + implicit def FPToFPParameterMainParser: ParserForClass[FPToFPParameterMain] = ParserForClass[FPToFPParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPToFPParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPToFP, FPToFPParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPToInt.scala b/elaborator/src/rocketv/FPToInt.scala new file mode 100644 index 000000000..2ac447b02 --- /dev/null +++ b/elaborator/src/rocketv/FPToInt.scala @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPToInt, FPToIntParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPToInt extends Elaborator { + @main + case class FPToIntParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "minFLen") minFLen: Int) { + def convert: FPToIntParameter = FPToIntParameter( + useAsyncReset, + xLen, + fLen, + minFLen + ) + } + + implicit def FPToIntParameterMainParser: ParserForClass[FPToIntParameterMain] = ParserForClass[FPToIntParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPToIntParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPToInt, FPToIntParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPU.scala b/elaborator/src/rocketv/FPU.scala new file mode 100644 index 000000000..6a077b1b8 --- /dev/null +++ b/elaborator/src/rocketv/FPU.scala @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPU, FPUParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPU extends Elaborator { + @main + case class FPUParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "useClockGating") useClockGating: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "minFLen") minFLen: Int, + @arg(name = "sfmaLatency") sfmaLatency: Int, + @arg(name = "dfmaLatency") dfmaLatency: Int, + @arg(name = "divSqrt") divSqrt: Boolean, + @arg(name = "hartIdLen") hartIdLen: Int) { + def convert: FPUParameter = FPUParameter( + useAsyncReset, + useClockGating, + xLen, + fLen, + minFLen, + sfmaLatency, + dfmaLatency, + divSqrt, + hartIdLen + ) + } + + implicit def FPUParameterMainParser: ParserForClass[FPUParameterMain] = ParserForClass[FPUParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPUParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPU, FPUParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/FPUFMAPipe.scala b/elaborator/src/rocketv/FPUFMAPipe.scala new file mode 100644 index 000000000..5c2b15724 --- /dev/null +++ b/elaborator/src/rocketv/FPUFMAPipe.scala @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{FPUFMAPipe, FPUFMAPipeParameter, FType} +import org.chipsalliance.t1.elaborator.Elaborator + +object FPUFMAPipe extends Elaborator { + @main + case class FPUFMAPipeParameterMain( + useAsyncReset: Boolean, + latency: Int, + xLen: Int, + fLen: Int, + minFLen: Int, + t: String) { + + def convert: FPUFMAPipeParameter = FPUFMAPipeParameter( + useAsyncReset, + latency, + xLen, + fLen, + minFLen, + t match { + case s"e${exp}s${sig}" => FType(exp.toInt, sig.toInt) + case "h" => FType(5, 11) + case "s" => FType(8, 24) + case "d" => FType(11, 53) + } + ) + } + + implicit def FPUFMAPipeParameterMainParser: ParserForClass[FPUFMAPipeParameterMain] = + ParserForClass[FPUFMAPipeParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FPUFMAPipeParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[FPUFMAPipe, FPUFMAPipeParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/IntToFP.scala b/elaborator/src/rocketv/IntToFP.scala new file mode 100644 index 000000000..b195becb7 --- /dev/null +++ b/elaborator/src/rocketv/IntToFP.scala @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{IntToFP, IntToFPParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object IntToFP extends Elaborator { + @main + case class IntToFPParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "xLen") xLen: Int, + @arg(name = "minFLen") minFLen: Int) { + def convert: IntToFPParameter = IntToFPParameter( + useAsyncReset, + latency, + fLen, + xLen, + minFLen + ) + } + + implicit def IntToFPParameterMainParser: ParserForClass[IntToFPParameterMain] = ParserForClass[IntToFPParameterMain] + + @main + def config(@arg(name = "parameter") parameter: IntToFPParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[IntToFP, IntToFPParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/elaborator/src/rocketv/MulAddRecFNPipe.scala b/elaborator/src/rocketv/MulAddRecFNPipe.scala new file mode 100644 index 000000000..741567250 --- /dev/null +++ b/elaborator/src/rocketv/MulAddRecFNPipe.scala @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{MulAddRecFNPipe, MulAddRecFNPipeParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object MulAddRecFNPipe extends Elaborator { + @main + case class MulAddRecFNPipeParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "latency") latency: Int, + @arg(name = "expWidth") expWidth: Int, + @arg(name = "sigWidth") sigWidth: Int) { + def convert: MulAddRecFNPipeParameter = MulAddRecFNPipeParameter(useAsyncReset, latency, expWidth, sigWidth) + } + + implicit def MulAddRecFNPipeParameterMainParser: ParserForClass[MulAddRecFNPipeParameterMain] = + ParserForClass[MulAddRecFNPipeParameterMain] + + @main + def config(@arg(name = "parameter") parameter: MulAddRecFNPipeParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[MulAddRecFNPipe, MulAddRecFNPipeParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From ae401041c2ea162b65dc78f7fd538ea44fe879ae Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 14:20:16 +0800 Subject: [PATCH 067/140] [rocketv] copy TLB into rocketv project --- rocketv/src/TLB.scala | 835 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 835 insertions(+) create mode 100644 rocketv/src/TLB.scala diff --git a/rocketv/src/TLB.scala b/rocketv/src/TLB.scala new file mode 100644 index 000000000..3fc9dec2a --- /dev/null +++ b/rocketv/src/TLB.scala @@ -0,0 +1,835 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util._ + +import org.chipsalliance.cde.config.{Field, Parameters} +import freechips.rocketchip.subsystem.CacheBlockBytes +import freechips.rocketchip.diplomacy.RegionType +import freechips.rocketchip.tile.{CoreBundle, CoreModule} +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property +import freechips.rocketchip.devices.debug.DebugModuleKey +import chisel3.experimental.SourceInfo + +case object PgLevels extends Field[Int](2) +case object ASIdBits extends Field[Int](0) +case object VMIdBits extends Field[Int](0) + +/** =SFENCE= + * rs1 rs2 + * {{{ + * 0 0 -> flush All + * 0 1 -> flush by ASID + * 1 1 -> flush by ADDR + * 1 0 -> flush by ADDR and ASID + * }}} + * {{{ + * If rs1=x0 and rs2=x0, the fence orders all reads and writes made to any level of the page tables, for all address spaces. + * If rs1=x0 and rs2!=x0, the fence orders all reads and writes made to any level of the page tables, but only for the address space identified by integer register rs2. Accesses to global mappings (see Section 4.3.1) are not ordered. + * If rs1!=x0 and rs2=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for all address spaces. + * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered. + * }}} + */ +class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) { + val rs1 = Bool() + val rs2 = Bool() + val addr = UInt(vaddrBits.W) + val asid = UInt((asIdBits.max(1)).W) // TODO zero-width + val hv = Bool() + val hg = Bool() +} + +class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) { + + /** request address from CPU. */ + val vaddr = UInt(vaddrBitsExtended.W) + + /** don't lookup TLB, bypass vaddr as paddr */ + val passthrough = Bool() + + /** granularity */ + val size = UInt(log2Ceil(lgMaxSize + 1).W) + + /** memory command. */ + val cmd = Bits(M_SZ.W) + val prv = UInt(PRV.SZ.W) + + /** virtualization mode */ + val v = Bool() + +} + +class TLBExceptions extends Bundle { + val ld = Bool() + val st = Bool() + val inst = Bool() +} + +class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { + // lookup responses + val miss = Bool() + + /** physical address */ + val paddr = UInt(paddrBits.W) + val gpa = UInt(vaddrBitsExtended.W) + val gpa_is_pte = Bool() + + /** page fault exception */ + val pf = new TLBExceptions + + /** guest page fault exception */ + val gf = new TLBExceptions + + /** access exception */ + val ae = new TLBExceptions + + /** misaligned access exception */ + val ma = new TLBExceptions + + /** if this address is cacheable */ + val cacheable = Bool() + + /** if caches must allocate this address */ + val must_alloc = Bool() + + /** if this address is prefetchable for caches */ + val prefetchable = Bool() +} + +class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) { + val ppn = UInt(ppnBits.W) + + /** pte.u user */ + val u = Bool() + + /** pte.g global */ + val g = Bool() + + /** access exception. + * D$ -> PTW -> TLB AE + * Alignment failed. + */ + val ae_ptw = Bool() + val ae_final = Bool() + val ae_stage2 = Bool() + + /** page fault */ + val pf = Bool() + + /** guest page fault */ + val gf = Bool() + + /** supervisor write */ + val sw = Bool() + + /** supervisor execute */ + val sx = Bool() + + /** supervisor read */ + val sr = Bool() + + /** hypervisor write */ + val hw = Bool() + + /** hypervisor excute */ + val hx = Bool() + + /** hypervisor read */ + val hr = Bool() + + /** prot_w */ + val pw = Bool() + + /** prot_x */ + val px = Bool() + + /** prot_r */ + val pr = Bool() + + /** PutPartial */ + val ppp = Bool() + + /** AMO logical */ + val pal = Bool() + + /** AMO arithmetic */ + val paa = Bool() + + /** get/put effects */ + val eff = Bool() + + /** cacheable */ + val c = Bool() + + /** fragmented_superpage support */ + val fragmented_superpage = Bool() +} + +/** basic cell for TLB data */ +class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean)(implicit p: Parameters) + extends CoreBundle()(p) { + require(nSectors == 1 || !superpage) + require(!superpageOnly || superpage) + + val level = UInt(log2Ceil(pgLevels).W) + + /** use vpn as tag */ + val tag_vpn = UInt(vpnBits.W) + + /** tag in vitualization mode */ + val tag_v = Bool() + + /** entry data */ + val data = Vec(nSectors, UInt(new TLBEntryData().getWidth.W)) + + /** valid bit */ + val valid = Vec(nSectors, Bool()) + + /** returns all entry data in this entry */ + def entry_data = data.map(_.asTypeOf(new TLBEntryData)) + + /** returns the index of sector */ + private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2 - 1, 0) + + /** returns the entry data matched with this vpn */ + def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData)) + + /** returns whether a sector hits */ + def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual) + + /** returns whether tag matches vpn */ + def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> nSectors.log2) === 0.U) && (tag_v === virtual) + + /** returns hit signal */ + def hit(vpn: UInt, virtual: Bool): Bool = { + if (superpage && usingVM) { + var tagMatch = valid.head && (tag_v === virtual) + for (j <- 0 until pgLevels) { + val base = (pgLevels - 1 - j) * pgLevelBits + val n = pgLevelBits + (if (j == 0) hypervisorExtraAddrBits else 0) + val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B + tagMatch = tagMatch && (ignore || (tag_vpn ^ vpn)(base + n - 1, base) === 0.U) + } + tagMatch + } else { + val idx = sectorIdx(vpn) + valid(idx) && sectorTagMatch(vpn, virtual) + } + } + + /** returns the ppn of the input TLBEntryData */ + def ppn(vpn: UInt, data: TLBEntryData) = { + val supervisorVPNBits = pgLevels * pgLevelBits + if (superpage && usingVM) { + var res = data.ppn >> pgLevelBits * (pgLevels - 1) + for (j <- 1 until pgLevels) { + val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B + res = Cat( + res, + (Mux(ignore, vpn, 0.U) | data.ppn)( + supervisorVPNBits - j * pgLevelBits - 1, + supervisorVPNBits - (j + 1) * pgLevelBits + ) + ) + } + res + } else { + data.ppn + } + } + + /** does the refill + * + * find the target entry with vpn tag + * and replace the target entry with the input entry data + */ + def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = { + this.tag_vpn := vpn + this.tag_v := virtual + this.level := level.extract(log2Ceil(pgLevels - superpageOnly.toInt) - 1, 0) + + val idx = sectorIdx(vpn) + valid(idx) := true.B + data(idx) := entry.asUInt + } + + def invalidate(): Unit = { valid.foreach(_ := false.B) } + def invalidate(virtual: Bool): Unit = { + for ((v, e) <- valid.zip(entry_data)) + when(tag_v === virtual) { v := false.B } + } + def invalidateVPN(vpn: UInt, virtual: Bool): Unit = { + if (superpage) { + when(hit(vpn, virtual)) { invalidate() } + } else { + when(sectorTagMatch(vpn, virtual)) { + for (((v, e), i) <- (valid.zip(entry_data)).zipWithIndex) + when(tag_v === virtual && i.U === sectorIdx(vpn)) { v := false.B } + } + } + // For fragmented superpage mappings, we assume the worst (largest) + // case, and zap entries whose most-significant VPNs match + when(((tag_vpn ^ vpn) >> (pgLevelBits * (pgLevels - 1))) === 0.U) { + for ((v, e) <- valid.zip(entry_data)) + when(tag_v === virtual && e.fragmented_superpage) { v := false.B } + } + } + def invalidateNonGlobal(virtual: Bool): Unit = { + for ((v, e) <- valid.zip(entry_data)) + when(tag_v === virtual && !e.g) { v := false.B } + } +} + +/** TLB config + * + * @param nSets the number of sets of PTE, follow [[ICacheParams.nSets]] + * @param nWays the total number of wayss of PTE, follow [[ICacheParams.nWays]] + * @param nSectors the number of ways in a single PTE TLBEntry + * @param nSuperpageEntries the number of SuperpageEntries + */ +case class TLBConfig( + nSets: Int, + nWays: Int, + nSectors: Int = 4, + nSuperpageEntries: Int = 4) + +/** =Overview= + * [[TLB]] is a TLB template which contains PMA logic and PMP checker. + * + * TLB caches PTE and accelerates the address translation process. + * When tlb miss happens, ask PTW(L2TLB) for Page Table Walk. + * Perform PMP and PMA check during the translation and throw exception if there were any. + * + * ==Cache Structure== + * - Sectored Entry (PTE) + * - set-associative or direct-mapped + * - nsets = [[TLBConfig.nSets]] + * - nways = [[TLBConfig.nWays]] / [[TLBConfig.nSectors]] + * - PTEEntry( sectors = [[TLBConfig.nSectors]] ) + * - LRU(if set-associative) + * + * - Superpage Entry(superpage PTE) + * - fully associative + * - nsets = [[TLBConfig.nSuperpageEntries]] + * - PTEEntry(sectors = 1) + * - PseudoLRU + * + * - Special Entry(PTE across PMP) + * - nsets = 1 + * - PTEEntry(sectors = 1) + * + * ==Address structure== + * {{{ + * |vaddr | + * |ppn/vpn | pgIndex | + * | | | + * | |nSets |nSector | |}}} + * + * ==State Machine== + * {{{ + * s_ready: ready to accept request from CPU. + * s_request: when L1TLB(this) miss, send request to PTW(L2TLB), . + * s_wait: wait for PTW to refill L1TLB. + * s_wait_invalidate: L1TLB is waiting for respond from PTW, but L1TLB will invalidate respond from PTW.}}} + * + * ==PMP== + * pmp check + * - special_entry: always check + * - other entry: check on refill + * + * ==Note== + * PMA consume diplomacy parameter generate physical memory address checking logic + * + * Boom use Rocket ITLB, and its own DTLB. + * + * Accelerators:{{{ + * sha3: DTLB + * gemmini: DTLB + * hwacha: DTLB*2+ITLB}}} + * @param instruction true for ITLB, false for DTLB + * @param lgMaxSize @todo seems granularity + * @param cfg [[TLBConfig]] + * @param edge collect SoC metadata. + */ +class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: TLEdgeOut, p: Parameters) + extends CoreModule()(p) { + val io = IO(new Bundle { + + /** request from Core */ + val req = Flipped(Decoupled(new TLBReq(lgMaxSize))) + + /** response to Core */ + val resp = Output(new TLBResp()) + + /** SFence Input */ + val sfence = Flipped(Valid(new SFenceReq)) + + /** IO to PTW */ + val ptw = new TLBPTWIO + + /** suppress a TLB refill, one cycle after a miss */ + val kill = Input(Bool()) + }) + io.ptw.customCSRs := DontCare + + val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits) + val vpn = io.req.bits.vaddr(vaddrBits - 1, pgIdxBits) + + /** index for sectored_Entry */ + val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2) + + /** TLB Entry */ + val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false)))) + + /** Superpage Entry */ + val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true))) + + /** Special Entry + * + * If PMP granularity is less than page size, thus need additional "special" entry manage PMP. + */ + val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false))) + def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries + def all_entries = ordinary_entries ++ special_entry + def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry + + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) + val state = RegInit(s_ready) + // use vpn as refill_tag + val r_refill_tag = Reg(UInt(vpnBits.W)) + val r_superpage_repl_addr = Reg(UInt(log2Ceil(superpage_entries.size).W)) + val r_sectored_repl_addr = Reg(UInt(log2Ceil(sectored_entries.head.size).W)) + val r_sectored_hit = Reg(Valid(UInt(log2Ceil(sectored_entries.head.size).W))) + val r_superpage_hit = Reg(Valid(UInt(log2Ceil(superpage_entries.size).W))) + val r_vstage1_en = Reg(Bool()) + val r_stage2_en = Reg(Bool()) + val r_need_gpa = Reg(Bool()) + val r_gpa_valid = Reg(Bool()) + val r_gpa = Reg(UInt(vaddrBits.W)) + val r_gpa_vpn = Reg(UInt(vpnBits.W)) + val r_gpa_is_pte = Reg(Bool()) + + /** privilege mode */ + val priv = io.req.bits.prv + val priv_v = usingHypervisor.B && io.req.bits.v + val priv_s = priv(0) + // user mode and supervisor mode + val priv_uses_vm = priv <= PRV.S.U + val satp = Mux(priv_v, io.ptw.vsatp, io.ptw.ptbr) + val stage1_en = usingVM.B && satp.mode(satp.mode.getWidth - 1) + + /** VS-stage translation enable */ + val vstage1_en = usingHypervisor.B && priv_v && io.ptw.vsatp.mode(io.ptw.vsatp.mode.getWidth - 1) + + /** G-stage translation enable */ + val stage2_en = usingHypervisor.B && priv_v && io.ptw.hgatp.mode(io.ptw.hgatp.mode.getWidth - 1) + + /** Enable Virtual Memory when: + * 1. statically configured + * 1. satp highest bits enabled + * i. RV32: + * - 0 -> Bare + * - 1 -> SV32 + * i. RV64: + * - 0000 -> Bare + * - 1000 -> SV39 + * - 1001 -> SV48 + * - 1010 -> SV57 + * - 1011 -> SV64 + * 1. In virtualization mode, vsatp highest bits enabled + * 1. priv mode in U and S. + * 1. in H & M mode, disable VM. + * 1. no passthrough(micro-arch defined.) + * + * @see RV-priv spec 4.1.11 Supervisor Address Translation and Protection (satp) Register + * @see RV-priv spec 8.2.18 Virtual Supervisor Address Translation and Protection Register (vsatp) + */ + val vm_enabled = (stage1_en || stage2_en) && priv_uses_vm && !io.req.bits.passthrough + + // flush guest entries on vsatp.MODE Bare <-> SvXX transitions + val v_entries_use_stage1 = RegInit(false.B) + val vsatp_mode_mismatch = priv_v && (vstage1_en =/= v_entries_use_stage1) && !io.req.bits.passthrough + + // share a single physical memory attribute checker (unshare if critical path) + val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits - 1, 0) + + /** refill signal */ + val do_refill = usingVM.B && io.ptw.resp.valid + + /** sfence invalidate refill */ + val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid + // PMP + val mpu_ppn = Mux( + do_refill, + refill_ppn, + Mux( + vm_enabled && special_entry.nonEmpty.B, + special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), + io.req.bits.vaddr >> pgIdxBits + ) + ) + val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits - 1, 0)) + val mpu_priv = + Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */ ), PRV.S.U, Cat(io.ptw.status.debug, priv)) + val pmp = Module(new PMPChecker(lgMaxSize)) + pmp.io.addr := mpu_physaddr + pmp.io.size := io.req.bits.size + pmp.io.pmp := (io.ptw.pmp: Seq[PMP]) + pmp.io.prv := mpu_priv + // PMA + // check exist a slave can consume this address. + val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_ || _) + // check utility to help check SoC property. + def fastCheck(member: TLManagerParameters => Boolean) = + legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b: Boolean) => b.B) + // todo: using DataScratchpad doesn't support cacheable. + val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B + val homogeneous = + TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous + // In M mode, if access DM address(debug module program buffer) + val deny_access_to_debug = + mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) + val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r + val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w + val prot_pp = fastCheck(_.supportsPutPartial) + val prot_al = fastCheck(_.supportsLogical) + val prot_aa = fastCheck(_.supportsArithmetic) + val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x + val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType) + + // hit check + val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn, priv_v)) + val superpage_hits = superpage_entries.map(_.hit(vpn, priv_v)) + val hitsVec = all_entries.map(vm_enabled && _.hit(vpn, priv_v)) + val real_hits = hitsVec.asUInt + val hits = Cat(!vm_enabled, real_hits) + + // use ptw response to refill + // permission bit arrays + when(do_refill) { + val pte = io.ptw.resp.bits.pte + val refill_v = r_vstage1_en || r_stage2_en + val newEntry = Wire(new TLBEntryData) + newEntry.ppn := pte.ppn + newEntry.c := cacheable + newEntry.u := pte.u + newEntry.g := pte.g && pte.v + newEntry.ae_ptw := io.ptw.resp.bits.ae_ptw + newEntry.ae_final := io.ptw.resp.bits.ae_final + newEntry.ae_stage2 := io.ptw.resp.bits.ae_final && io.ptw.resp.bits.gpa_is_pte && r_stage2_en + newEntry.pf := io.ptw.resp.bits.pf + newEntry.gf := io.ptw.resp.bits.gf + newEntry.hr := io.ptw.resp.bits.hr + newEntry.hw := io.ptw.resp.bits.hw + newEntry.hx := io.ptw.resp.bits.hx + newEntry.sr := pte.sr() + newEntry.sw := pte.sw() + newEntry.sx := pte.sx() + newEntry.pr := prot_r + newEntry.pw := prot_w + newEntry.px := prot_x + newEntry.ppp := prot_pp + newEntry.pal := prot_al + newEntry.paa := prot_aa + newEntry.eff := prot_eff + newEntry.fragmented_superpage := io.ptw.resp.bits.fragmented_superpage + // refill special_entry + when(special_entry.nonEmpty.B && !io.ptw.resp.bits.homogeneous) { + special_entry.foreach(_.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry)) + }.elsewhen(io.ptw.resp.bits.level < (pgLevels - 1).U) { + val waddr = Mux(r_superpage_hit.valid && usingHypervisor.B, r_superpage_hit.bits, r_superpage_repl_addr) + for ((e, i) <- superpage_entries.zipWithIndex) when(r_superpage_repl_addr === i.U) { + e.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry) + when(invalidate_refill) { e.invalidate() } + } + // refill sectored_hit + }.otherwise { + val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2) + val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr) + for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when(waddr === i.U) { + when(!r_sectored_hit.valid) { e.invalidate() } + e.insert(r_refill_tag, refill_v, 0.U, newEntry) + when(invalidate_refill) { e.invalidate() } + } + } + + r_gpa_valid := io.ptw.resp.bits.gpa.valid + r_gpa := io.ptw.resp.bits.gpa.bits + r_gpa_is_pte := io.ptw.resp.bits.gpa_is_pte + } + + // get all entries data. + val entries = all_entries.map(_.getData(vpn)) + val normal_entries = entries.take(ordinary_entries.size) + // parallel query PPN from [[all_entries]], if VM not enabled return VPN instead + val ppn = Mux1H( + hitsVec :+ !vm_enabled, + (all_entries.zip(entries)).map { case (entry, data) => entry.ppn(vpn, data) } :+ vpn(ppnBits - 1, 0) + ) + + val nPhysicalEntries = 1 + special_entry.size + // generally PTW misaligned load exception. + val ptw_ae_array = Cat(false.B, entries.map(_.ae_ptw).asUInt) + val final_ae_array = Cat(false.B, entries.map(_.ae_final).asUInt) + val ptw_pf_array = Cat(false.B, entries.map(_.pf).asUInt) + val ptw_gf_array = Cat(false.B, entries.map(_.gf).asUInt) + val sum = Mux(priv_v, io.ptw.gstatus.sum, io.ptw.status.sum) + // if in hypervisor/machine mode, cannot read/write user entries. + // if in superviosr/user mode, "If the SUM bit in the sstatus register is set, supervisor mode software may also access pages with U=1.(from spec)" + val priv_rw_ok = Mux(!priv_s || sum, entries.map(_.u).asUInt, 0.U) | Mux(priv_s, ~entries.map(_.u).asUInt, 0.U) + // if in hypervisor/machine mode, other than user pages, all pages are executable. + // if in superviosr/user mode, only user page can execute. + val priv_x_ok = Mux(priv_s, ~entries.map(_.u).asUInt, entries.map(_.u).asUInt) + val stage1_bypass = + Fill(entries.size, usingHypervisor.B) & (Fill(entries.size, !stage1_en) | entries.map(_.ae_stage2).asUInt) + val mxr = io.ptw.status.mxr | Mux(priv_v, io.ptw.gstatus.mxr, false.B) + // "The vsstatus field MXR, which makes execute-only pages readable, only overrides VS-stage page protection.(from spec)" + val r_array = + Cat(true.B, (priv_rw_ok & (entries.map(_.sr).asUInt | Mux(mxr, entries.map(_.sx).asUInt, 0.U))) | stage1_bypass) + val w_array = Cat(true.B, (priv_rw_ok & entries.map(_.sw).asUInt) | stage1_bypass) + val x_array = Cat(true.B, (priv_x_ok & entries.map(_.sx).asUInt) | stage1_bypass) + val stage2_bypass = Fill(entries.size, !stage2_en) + val hr_array = + Cat(true.B, entries.map(_.hr).asUInt | Mux(io.ptw.status.mxr, entries.map(_.hx).asUInt, 0.U) | stage2_bypass) + val hw_array = Cat(true.B, entries.map(_.hw).asUInt | stage2_bypass) + val hx_array = Cat(true.B, entries.map(_.hx).asUInt | stage2_bypass) + // These array is for each TLB entries. + // user mode can read: PMA OK, TLB OK, AE OK + val pr_array = Cat(Fill(nPhysicalEntries, prot_r), normal_entries.map(_.pr).asUInt) & ~(ptw_ae_array | final_ae_array) + // user mode can write: PMA OK, TLB OK, AE OK + val pw_array = Cat(Fill(nPhysicalEntries, prot_w), normal_entries.map(_.pw).asUInt) & ~(ptw_ae_array | final_ae_array) + // user mode can write: PMA OK, TLB OK, AE OK + val px_array = Cat(Fill(nPhysicalEntries, prot_x), normal_entries.map(_.px).asUInt) & ~(ptw_ae_array | final_ae_array) + // put effect + val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt) + // cacheable + val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt) + // put partial + val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt) + // atomic arithmetic + val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), normal_entries.map(_.paa).asUInt) + // atomic logic + val pal_array = Cat(Fill(nPhysicalEntries, prot_al), normal_entries.map(_.pal).asUInt) + val ppp_array_if_cached = ppp_array | c_array + val paa_array_if_cached = paa_array | (if (usingAtomicsInCache) c_array else 0.U) + val pal_array_if_cached = pal_array | (if (usingAtomicsInCache) c_array else 0.U) + val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries - 1), normal_entries.map(_.c).asUInt) + + // vaddr misaligned: vaddr[1:0]=b00 + val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR + def badVA(guestPA: Boolean): Bool = { + val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels + val extraBits = if (guestPA) hypervisorExtraAddrBits else 0 + val signed = !guestPA + val nPgLevelChoices = pgLevels - minPgLevels + 1 + val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits + (for (i <- 0 until nPgLevelChoices) yield { + val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.toInt))).U + val maskedVAddr = io.req.bits.vaddr & mask + additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask) + }).orR + } + val bad_gpa = + if (!usingHypervisor) false.B + else vm_enabled && !stage1_en && badVA(true) + val bad_va = + if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B + else vm_enabled && stage1_en && badVA(false) + + val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(M_XLR, M_XSC) + val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd) + val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd) + val cmd_put_partial = io.req.bits.cmd === M_PWR + val cmd_read = isRead(io.req.bits.cmd) + val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX + val cmd_write = isWrite(io.req.bits.cmd) + val cmd_write_perms = cmd_write || + io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions + + val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array) + val ae_array = + Mux(misaligned, eff_array, 0.U) | + Mux(cmd_lrsc, ~lrscAllowed, 0.U) + + // access exception needs SoC information from PMA + val ae_ld_array = Mux(cmd_read, ae_array | ~pr_array, 0.U) + val ae_st_array = + Mux(cmd_write_perms, ae_array | ~pw_array, 0.U) | + Mux(cmd_put_partial, ~ppp_array_if_cached, 0.U) | + Mux(cmd_amo_logical, ~pal_array_if_cached, 0.U) | + Mux(cmd_amo_arithmetic, ~paa_array_if_cached, 0.U) + val must_alloc_array = + Mux(cmd_put_partial, ~ppp_array, 0.U) | + Mux(cmd_amo_logical, ~pal_array, 0.U) | + Mux(cmd_amo_arithmetic, ~paa_array, 0.U) | + Mux(cmd_lrsc, ~0.U(pal_array.getWidth.W), 0.U) + val pf_ld_array = + Mux(cmd_read, ((~Mux(cmd_readx, x_array, r_array) & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U) + val pf_st_array = Mux(cmd_write_perms, ((~w_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U) + val pf_inst_array = ((~x_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array + val gf_ld_array = Mux(priv_v && cmd_read, ~Mux(cmd_readx, hx_array, hr_array) & ~ptw_ae_array, 0.U) + val gf_st_array = Mux(priv_v && cmd_write_perms, ~hw_array & ~ptw_ae_array, 0.U) + val gf_inst_array = Mux(priv_v, ~hx_array & ~ptw_ae_array, 0.U) + + val gpa_hits = { + val need_gpa_mask = if (instruction) gf_inst_array else gf_ld_array | gf_st_array + val hit_mask = Fill(ordinary_entries.size, r_gpa_valid && r_gpa_vpn === vpn) | Fill(all_entries.size, !vstage1_en) + hit_mask | ~need_gpa_mask(all_entries.size - 1, 0) + } + + val tlb_hit_if_not_gpa_miss = real_hits.orR + val tlb_hit = (real_hits & gpa_hits).orR + // leads to s_request + val tlb_miss = vm_enabled && !vsatp_mode_mismatch && !bad_va && !tlb_hit + + val sectored_plru = new SetAssocLRU(cfg.nSets, sectored_entries.head.size, "plru") + val superpage_plru = new PseudoLRU(superpage_entries.size) + when(io.req.valid && vm_enabled) { + // replace + when(sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) } + when(superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) } + } + + // Superpages create the possibility that two entries in the TLB may match. + // This corresponds to a software bug, but we can't return complete garbage; + // we must return either the old translation or the new translation. This + // isn't compatible with the Mux1H approach. So, flush the TLB and report + // a miss on duplicate entries. + val multipleHits = PopCountAtLeast(real_hits, 2) + + // only pull up req.ready when this is s_ready state. + io.req.ready := state === s_ready + // page fault + io.resp.pf.ld := (bad_va && cmd_read) || (pf_ld_array & hits).orR + io.resp.pf.st := (bad_va && cmd_write_perms) || (pf_st_array & hits).orR + io.resp.pf.inst := bad_va || (pf_inst_array & hits).orR + // guest page fault + io.resp.gf.ld := (bad_gpa && cmd_read) || (gf_ld_array & hits).orR + io.resp.gf.st := (bad_gpa && cmd_write_perms) || (gf_st_array & hits).orR + io.resp.gf.inst := bad_gpa || (gf_inst_array & hits).orR + // access exception + io.resp.ae.ld := (ae_ld_array & hits).orR + io.resp.ae.st := (ae_st_array & hits).orR + io.resp.ae.inst := (~px_array & hits).orR + // misaligned + io.resp.ma.ld := misaligned && cmd_read + io.resp.ma.st := misaligned && cmd_write + io.resp.ma.inst := false.B // this is up to the pipeline to figure out + io.resp.cacheable := (c_array & hits).orR + io.resp.must_alloc := (must_alloc_array & hits).orR + io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers + .forall(m => !m.supportsAcquireB || m.supportsHint) + .B + io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits + io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits - 1, 0)) + io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte + io.resp.gpa := { + val page = Mux(!vstage1_en, Cat(bad_gpa, vpn), r_gpa >> pgIdxBits) + val offset = Mux(io.resp.gpa_is_pte, r_gpa(pgIdxBits - 1, 0), io.req.bits.vaddr(pgIdxBits - 1, 0)) + Cat(page, offset) + } + + io.ptw.req.valid := state === s_request + io.ptw.req.bits.valid := !io.kill + io.ptw.req.bits.bits.addr := r_refill_tag + io.ptw.req.bits.bits.vstage1 := r_vstage1_en + io.ptw.req.bits.bits.stage2 := r_stage2_en + io.ptw.req.bits.bits.need_gpa := r_need_gpa + + if (usingVM) { + when(io.ptw.req.fire && io.ptw.req.bits.valid) { + r_gpa_valid := false.B + r_gpa_vpn := r_refill_tag + } + + val sfence = io.sfence.valid + // this is [[s_ready]] + // handle miss/hit at the first cycle. + // if miss, request PTW(L2TLB). + when(io.req.fire && tlb_miss) { + state := s_request + r_refill_tag := vpn + r_need_gpa := tlb_hit_if_not_gpa_miss + r_vstage1_en := vstage1_en + r_stage2_en := stage2_en + r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way) + r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx)) + r_sectored_hit.valid := sector_hits.orR + r_sectored_hit.bits := OHToUInt(sector_hits) + r_superpage_hit.valid := superpage_hits.orR + r_superpage_hit.bits := OHToUInt(superpage_hits) + } + // Handle SFENCE.VMA when send request to PTW. + // SFENCE.VMA io.ptw.req.ready kill + // ? ? 1 + // 0 0 0 + // 0 1 0 -> s_wait + // 1 0 0 -> s_wait_invalidate + // 1 0 0 -> s_ready + when(state === s_request) { + // SFENCE.VMA will kill TLB entries based on rs1 and rs2. It will take 1 cycle. + when(sfence) { state := s_ready } + // here should be io.ptw.req.fire, but assert(io.ptw.req.ready === true.B) + // fire -> s_wait + when(io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) } + // If CPU kills request(frontend.s2_redirect) + when(io.kill) { state := s_ready } + } + // sfence in refill will results in invalidate + when(state === s_wait && sfence) { + state := s_wait_invalidate + } + // after CPU acquire response, go back to s_ready. + when(io.ptw.resp.valid) { + state := s_ready + } + + // SFENCE processing logic. + when(sfence) { + assert(!io.sfence.bits.rs1 || (io.sfence.bits.addr >> pgIdxBits) === vpn) + for (e <- all_real_entries) { + val hv = usingHypervisor.B && io.sfence.bits.hv + val hg = usingHypervisor.B && io.sfence.bits.hg + when(!hg && io.sfence.bits.rs1) { e.invalidateVPN(vpn, hv) } + .elsewhen(!hg && io.sfence.bits.rs2) { e.invalidateNonGlobal(hv) } + .otherwise { e.invalidate(hv || hg) } + } + } + when(io.req.fire && vsatp_mode_mismatch) { + all_real_entries.foreach(_.invalidate(true.B)) + v_entries_use_stage1 := vstage1_en + } + when(multipleHits || reset.asBool) { + all_real_entries.foreach(_.invalidate()) + } + + ccover(io.ptw.req.fire, "MISS", "TLB miss") + ccover(io.ptw.req.valid && !io.ptw.req.ready, "PTW_STALL", "TLB miss, but PTW busy") + ccover(state === s_wait_invalidate, "SFENCE_DURING_REFILL", "flush TLB during TLB refill") + ccover(sfence && !io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_ALL", "flush TLB") + ccover(sfence && !io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_ASID", "flush TLB ASID") + ccover(sfence && io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_LINE", "flush TLB line") + ccover(sfence && io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_LINE_ASID", "flush TLB line/ASID") + ccover(multipleHits, "MULTIPLE_HITS", "Two matching translations in TLB") + } + + def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + property.cover(cond, s"${if (instruction) "I" else "D"}TLB_$label", "MemorySystem;;" + desc) + + /** Decides which entry to be replaced + * + * If there is a invalid entry, replace it with priorityencoder; + * if not, replace the alt entry + * + * @return mask for TLBEntry replacement + */ + def replacementEntry(set: Seq[TLBEntry], alt: UInt) = { + val valids = set.map(_.valid.orR).asUInt + Mux(valids.andR, alt, PriorityEncoder(~valids)) + } +} From 1f9906f139f41d221f2b972e6af295b1413fc38d Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 16:36:37 +0800 Subject: [PATCH 068/140] [rocketv] migrate TLB --- rocketv/src/Bundle.scala | 243 ++++++++++++- rocketv/src/TLB.scala | 751 ++++++++++++++++----------------------- 2 files changed, 539 insertions(+), 455 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 741b297ad..48a7fab46 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -1041,7 +1041,6 @@ class IntToFPInput(xLen: Int) extends Bundle { val in1 = UInt(xLen.W) } - class FPUCoreIO(hartIdLen: Int, xLen: Int, fLen: Int) extends Bundle { val hartid = Input(UInt(hartIdLen.W)) val time = Input(UInt(xLen.W)) @@ -1073,3 +1072,245 @@ class FPUCoreIO(hartIdLen: Int, xLen: Int, fLen: Int) extends Bundle { val keep_clock_enabled = Input(Bool()) } + +class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int)() extends Bundle { + // TODO: remove it. + val M_SZ = 5 + + /** request address from CPU. */ + val vaddr = UInt(vaddrBitsExtended.W) + + /** don't lookup TLB, bypass vaddr as paddr */ + val passthrough = Bool() + + /** granularity */ + val size = UInt(log2Ceil(lgMaxSize + 1).W) + + /** memory command. */ + val cmd = UInt(M_SZ.W) + val prv = UInt(PRV.SZ.W) + + /** virtualization mode */ + val v = Bool() + +} + +class TLBResp(paddrBits: Int, vaddrBitsExtended: Int) extends Bundle { + // lookup responses + val miss = Bool() + + /** physical address */ + val paddr = UInt(paddrBits.W) + val gpa = UInt(vaddrBitsExtended.W) + val gpa_is_pte = Bool() + + /** page fault exception */ + val pf = new TLBExceptions + + /** guest page fault exception */ + val gf = new TLBExceptions + + /** access exception */ + val ae = new TLBExceptions + + /** misaligned access exception */ + val ma = new TLBExceptions + + /** if this address is cacheable */ + val cacheable = Bool() + + /** if caches must allocate this address */ + val must_alloc = Bool() + + /** if this address is prefetchable for caches */ + val prefetchable = Bool() +} + +class TLBExceptions extends Bundle { + val ld = Bool() + val st = Bool() + val inst = Bool() +} + +object TLBEntry { + + /** returns all entry data in this entry */ + def entry_data(tlbEntry: TLBEntry) = tlbEntry.data.map(_.asTypeOf(new TLBEntryData(tlbEntry.ppnBits))) + + /** returns the index of sector */ + private def sectorIdx(tlbEntry: TLBEntry, vpn: UInt) = vpn(log2Ceil(tlbEntry.nSectors) - 1, 0) + + /** returns the entry data matched with this vpn */ + def getData(tlbEntry: TLBEntry, vpn: UInt) = tlbEntry.data(sectorIdx(tlbEntry, vpn)).asTypeOf(new TLBEntryData(tlbEntry.ppnBits)) + + /** returns whether a sector hits */ + def sectorHit(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool) = tlbEntry.valid.asUInt.orR && sectorTagMatch(tlbEntry, vpn, virtual) + + /** returns whether tag matches vpn */ + def sectorTagMatch(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool) = (((tlbEntry.tag_vpn ^ vpn) >> log2Ceil(tlbEntry.nSectors)) === 0.U) && (tlbEntry.tag_v === virtual) + + /** returns hit signal */ + def hit(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool, usingVM: Boolean, pgLevelBits: Int, hypervisorExtraAddrBits: Int, superpage: Boolean, superpageOnly: Boolean): Bool = { + if (superpage && usingVM) { + var tagMatch = tlbEntry.valid.head && (tlbEntry.tag_v === virtual) + for (j <- 0 until tlbEntry.pgLevels) { + val base = (tlbEntry.pgLevels - 1 - j) * pgLevelBits + val n = pgLevelBits + (if (j == 0) hypervisorExtraAddrBits else 0) + val ignore = tlbEntry.level < j.U || (superpageOnly && (j == (tlbEntry.pgLevels - 1))).B + tagMatch = tagMatch && (ignore || (tlbEntry.tag_vpn ^ vpn)(base + n - 1, base) === 0.U) + } + tagMatch + } else { + val idx = sectorIdx(tlbEntry, vpn) + tlbEntry.valid(idx) && sectorTagMatch(tlbEntry, vpn, virtual) + } + } + + /** returns the ppn of the input TLBEntryData */ + def ppn(tlbEntry: TLBEntry, vpn: UInt, data: TLBEntryData, usingVM: Boolean, pgLevelBits: Int, superpage: Boolean, superpageOnly: Boolean) = { + val supervisorVPNBits = tlbEntry.pgLevels * pgLevelBits + if (superpage && usingVM) { + var res = data.ppn >> pgLevelBits * (tlbEntry.pgLevels - 1) + for (j <- 1 until tlbEntry.pgLevels) { + val ignore = tlbEntry.level < j.U || (superpageOnly && j == tlbEntry.pgLevels - 1).B + res = Cat( + res, + (Mux(ignore, vpn, 0.U) | data.ppn)( + supervisorVPNBits - j * pgLevelBits - 1, + supervisorVPNBits - (j + 1) * pgLevelBits + ) + ) + } + res + } else { + data.ppn + } + } + + /** does the refill + * + * find the target entry with vpn tag + * and replace the target entry with the input entry data + */ + def insert(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData, superpageOnly: Boolean): Unit = { + tlbEntry.tag_vpn := vpn + tlbEntry.tag_v := virtual + tlbEntry.level := level(log2Ceil(tlbEntry.pgLevels - (if (superpageOnly) 1 else 0)) - 1, 0) + + val idx = sectorIdx(tlbEntry, vpn) + tlbEntry.valid(idx) := true.B + tlbEntry.data(idx) := entry.asUInt + } + + def invalidate(tlbEntry: TLBEntry): Unit = { tlbEntry.valid.foreach(_ := false.B) } + def invalidate(tlbEntry: TLBEntry, virtual: Bool): Unit = { + for ((v, e) <- tlbEntry.valid.zip(entry_data(tlbEntry))) + when(tlbEntry.tag_v === virtual) { v := false.B } + } + def invalidateVPN(tlbEntry: TLBEntry, vpn: UInt, virtual: Bool, usingVM: Boolean, pgLevelBits: Int, hypervisorExtraAddrBits: Int, superpage: Boolean, superpageOnly: Boolean): Unit = { + if (superpage) { + when(hit(tlbEntry, vpn, virtual, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage, superpageOnly)) { invalidate(tlbEntry) } + } else { + when(sectorTagMatch(tlbEntry, vpn, virtual)) { + for (((v, e), i) <- (tlbEntry.valid.zip(entry_data(tlbEntry))).zipWithIndex) + when(tlbEntry.tag_v === virtual && i.U === sectorIdx(tlbEntry, vpn)) { v := false.B } + } + } + // For fragmented superpage mappings, we assume the worst (largest) + // case, and zap entries whose most-significant VPNs match + when(((tlbEntry.tag_vpn ^ vpn) >> (pgLevelBits * (tlbEntry.pgLevels - 1))) === 0.U) { + for ((v, e) <- tlbEntry.valid.zip(entry_data(tlbEntry))) + when(tlbEntry.tag_v === virtual && e.fragmented_superpage) { v := false.B } + } + } + def invalidateNonGlobal(tlbEntry: TLBEntry, virtual: Bool): Unit = { + for ((v, e) <- tlbEntry.valid.zip(entry_data(tlbEntry))) + when(tlbEntry.tag_v === virtual && !e.g) { v := false.B } + } +} + +class TLBEntry(val nSectors: Int, val pgLevels: Int, vpnBits: Int, val ppnBits: Int) extends Bundle { + + val level = UInt(log2Ceil(pgLevels).W) + + /** use vpn as tag */ + val tag_vpn = UInt(vpnBits.W) + + /** tag in vitualization mode */ + val tag_v = Bool() + + /** entry data */ + val data = Vec(nSectors, UInt(new TLBEntryData(ppnBits).getWidth.W)) + + /** valid bit */ + val valid = Vec(nSectors, Bool()) +} + +class TLBEntryData(ppnBits: Int) extends Bundle { + val ppn = UInt(ppnBits.W) + + /** pte.u user */ + val u = Bool() + + /** pte.g global */ + val g = Bool() + + /** access exception. + * D$ -> PTW -> TLB AE + * Alignment failed. + */ + val ae_ptw = Bool() + val ae_final = Bool() + val ae_stage2 = Bool() + + /** page fault */ + val pf = Bool() + + /** guest page fault */ + val gf = Bool() + + /** supervisor write */ + val sw = Bool() + + /** supervisor execute */ + val sx = Bool() + + /** supervisor read */ + val sr = Bool() + + /** hypervisor write */ + val hw = Bool() + + /** hypervisor excute */ + val hx = Bool() + + /** hypervisor read */ + val hr = Bool() + + /** prot_w */ + val pw = Bool() + + /** prot_x */ + val px = Bool() + + /** prot_r */ + val pr = Bool() + + /** PutPartial */ + val ppp = Bool() + + /** AMO logical */ + val pal = Bool() + + /** AMO arithmetic */ + val paa = Bool() + + /** get/put effects */ + val eff = Bool() + + /** cacheable */ + val c = Bool() + + /** fragmented_superpage support */ + val fragmented_superpage = Bool() +} diff --git a/rocketv/src/TLB.scala b/rocketv/src/TLB.scala index 3fc9dec2a..3ea02f4f9 100644 --- a/rocketv/src/TLB.scala +++ b/rocketv/src/TLB.scala @@ -1,401 +1,228 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util._ - -import org.chipsalliance.cde.config.{Field, Parameters} -import freechips.rocketchip.subsystem.CacheBlockBytes -import freechips.rocketchip.diplomacy.RegionType -import freechips.rocketchip.tile.{CoreBundle, CoreModule} -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property -import freechips.rocketchip.devices.debug.DebugModuleKey -import chisel3.experimental.SourceInfo - -case object PgLevels extends Field[Int](2) -case object ASIdBits extends Field[Int](0) -case object VMIdBits extends Field[Int](0) - -/** =SFENCE= - * rs1 rs2 - * {{{ - * 0 0 -> flush All - * 0 1 -> flush by ASID - * 1 1 -> flush by ADDR - * 1 0 -> flush by ADDR and ASID - * }}} - * {{{ - * If rs1=x0 and rs2=x0, the fence orders all reads and writes made to any level of the page tables, for all address spaces. - * If rs1=x0 and rs2!=x0, the fence orders all reads and writes made to any level of the page tables, but only for the address space identified by integer register rs2. Accesses to global mappings (see Section 4.3.1) are not ordered. - * If rs1!=x0 and rs2=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for all address spaces. - * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered. - * }}} - */ -class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) { - val rs1 = Bool() - val rs2 = Bool() - val addr = UInt(vaddrBits.W) - val asid = UInt((asIdBits.max(1)).W) // TODO zero-width - val hv = Bool() - val hg = Bool() -} - -class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) { - - /** request address from CPU. */ - val vaddr = UInt(vaddrBitsExtended.W) - - /** don't lookup TLB, bypass vaddr as paddr */ - val passthrough = Bool() - - /** granularity */ - val size = UInt(log2Ceil(lgMaxSize + 1).W) - - /** memory command. */ - val cmd = Bits(M_SZ.W) - val prv = UInt(PRV.SZ.W) - - /** virtualization mode */ - val v = Bool() - -} +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, Decoupled, Enum, Fill, Mux1H, OHToUInt, PopCount, PriorityEncoder, UIntToOH, Valid, log2Ceil} -class TLBExceptions extends Bundle { - val ld = Bool() - val st = Bool() - val inst = Bool() +object TLBParameter { + implicit def rwP: upickle.default.ReadWriter[TLBParameter] = upickle.default.macroRW[TLBParameter] } -class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { - // lookup responses - val miss = Bool() - - /** physical address */ - val paddr = UInt(paddrBits.W) - val gpa = UInt(vaddrBitsExtended.W) - val gpa_is_pte = Bool() +case class TLBParameter( + useAsyncReset: Boolean, + xLen: Int, + nSets: Int, + nWays: Int, + nSectors: Int, + nSuperpageEntries: Int, + asidBits: Int, + pgLevels: Int, + usingHypervisor: Boolean, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingAtomicsOnlyForIO: Boolean, + usingVM: Boolean, + usingAtomicsInCache: Boolean, + nPMPs: Int, + pmaCheckerParameter: PMACheckerParameter, + paddrBits: Int, + isITLB: Boolean + ) extends SerializableModuleParameter { + require(nWays > nSectors, s"nWays: ${nWays} > nSectors: ${nSectors}") + // D$: log2Ceil(coreDataBytes), I$: log2Ceil(fetchBytes) + def lgMaxSize = log2Ceil(xLen / 8) + + def pmpCheckerParameter: PMPCheckerParameter = PMPCheckerParameter(nPMPs, paddrBits, lgMaxSize, pmpGranularity) + + def vpnBits: Int = vaddrBits - pgIdxBits + + def ppnBits: Int = paddrBits - pgIdxBits + + private def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + (if (usingHypervisor) 1 else 0) else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1).min(xLen) + } - /** page fault exception */ - val pf = new TLBExceptions + def minPgLevels: Int = { + val res = xLen match { + case 32 => 2 + case 64 => 3 + } + require(pgLevels >= res) + res + } - /** guest page fault exception */ - val gf = new TLBExceptions + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) - /** access exception */ - val ae = new TLBExceptions + def maxHypervisorExtraAddrBits: Int = 2 - /** misaligned access exception */ - val ma = new TLBExceptions + def hypervisorExtraAddrBits: Int = { + if (usingHypervisor) maxHypervisorExtraAddrBits + else 0 + } - /** if this address is cacheable */ - val cacheable = Bool() + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } - /** if caches must allocate this address */ - val must_alloc = Bool() + def pgIdxBits: Int = 12 - /** if this address is prefetchable for caches */ - val prefetchable = Bool() + def pmpGranularity: Int = if (usingHypervisor) 4096 else 4 } -class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) { - val ppn = UInt(ppnBits.W) - - /** pte.u user */ - val u = Bool() - - /** pte.g global */ - val g = Bool() - - /** access exception. - * D$ -> PTW -> TLB AE - * Alignment failed. - */ - val ae_ptw = Bool() - val ae_final = Bool() - val ae_stage2 = Bool() - - /** page fault */ - val pf = Bool() - - /** guest page fault */ - val gf = Bool() - - /** supervisor write */ - val sw = Bool() - - /** supervisor execute */ - val sx = Bool() - - /** supervisor read */ - val sr = Bool() - - /** hypervisor write */ - val hw = Bool() - - /** hypervisor excute */ - val hx = Bool() - - /** hypervisor read */ - val hr = Bool() - - /** prot_w */ - val pw = Bool() - - /** prot_x */ - val px = Bool() - - /** prot_r */ - val pr = Bool() - - /** PutPartial */ - val ppp = Bool() - - /** AMO logical */ - val pal = Bool() - - /** AMO arithmetic */ - val paa = Bool() - - /** get/put effects */ - val eff = Bool() - - /** cacheable */ - val c = Bool() +class TLBInterface(parameter: TLBParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + + /** request from Core */ + val req = Flipped(Decoupled(new TLBReq(parameter.lgMaxSize, parameter.vaddrBitsExtended))) + + /** response to Core */ + val resp = Output(new TLBResp(parameter.paddrBits, parameter.vaddrBitsExtended)) + + /** SFence Input */ + val sfence = Flipped(Valid(new SFenceReq(parameter.vaddrBits, parameter.asidBits))) + + /** IO to PTW */ + val ptw = new TLBPTWIO( + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.vaddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits + ) - /** fragmented_superpage support */ - val fragmented_superpage = Bool() + /** suppress a TLB refill, one cycle after a miss */ + val kill = Input(Bool()) } -/** basic cell for TLB data */ -class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean)(implicit p: Parameters) - extends CoreBundle()(p) { - require(nSectors == 1 || !superpage) - require(!superpageOnly || superpage) - - val level = UInt(log2Ceil(pgLevels).W) - - /** use vpn as tag */ - val tag_vpn = UInt(vpnBits.W) - - /** tag in vitualization mode */ - val tag_v = Bool() - - /** entry data */ - val data = Vec(nSectors, UInt(new TLBEntryData().getWidth.W)) - - /** valid bit */ - val valid = Vec(nSectors, Bool()) - - /** returns all entry data in this entry */ - def entry_data = data.map(_.asTypeOf(new TLBEntryData)) - - /** returns the index of sector */ - private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2 - 1, 0) - - /** returns the entry data matched with this vpn */ - def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData)) - - /** returns whether a sector hits */ - def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual) - - /** returns whether tag matches vpn */ - def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> nSectors.log2) === 0.U) && (tag_v === virtual) - - /** returns hit signal */ - def hit(vpn: UInt, virtual: Bool): Bool = { - if (superpage && usingVM) { - var tagMatch = valid.head && (tag_v === virtual) - for (j <- 0 until pgLevels) { - val base = (pgLevels - 1 - j) * pgLevelBits - val n = pgLevelBits + (if (j == 0) hypervisorExtraAddrBits else 0) - val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B - tagMatch = tagMatch && (ignore || (tag_vpn ^ vpn)(base + n - 1, base) === 0.U) - } - tagMatch - } else { - val idx = sectorIdx(vpn) - valid(idx) && sectorTagMatch(vpn, virtual) - } +@instantiable +class TLB(val parameter: TLBParameter) + extends FixedIORawModule(new TLBInterface(parameter)) + with SerializableModule[TLBParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val pmpGranularity = parameter.pmpGranularity + val vaddrBits = parameter.vaddrBits + val vaddrBitsExtended = parameter.vaddrBitsExtended + val pgIdxBits = parameter.pgIdxBits + val pgLevels = parameter.pgLevels + val minPgLevels = parameter.minPgLevels + val pgLevelBits = parameter.pgLevelBits + val hypervisorExtraAddrBits = parameter.hypervisorExtraAddrBits + val vpnBits = parameter.vpnBits + val ppnBits = parameter.ppnBits + val usingHypervisor = parameter.usingHypervisor + val usingAtomics = parameter.usingAtomics + val usingVM = parameter.usingVM + val usingDataScratchpad = parameter.usingDataScratchpad + val usingAtomicsOnlyForIO = parameter.usingAtomicsOnlyForIO + val instruction = parameter.isITLB + val usingAtomicsInCache = parameter.usingAtomicsInCache + val lgMaxSize = parameter.lgMaxSize + def M_XLR = "b00110".U + def M_XSC = "b00111".U + + def M_XA_SWAP = "b00100".U + def M_XA_XOR = "b01001".U + def M_XA_OR = "b01010".U + def M_XA_AND = "b01011".U + def M_XA_ADD = "b01000".U + def M_XA_MIN = "b01100".U + def M_XA_MAX = "b01101".U + def M_XA_MINU = "b01110".U + def M_XA_MAXU = "b01111".U + def M_PWR = "b10001".U // partial (masked) store + def M_XRD = "b00000".U; // int load + def M_HLVX = "b10000".U // HLVX instruction + def M_XWR = "b00001".U; // int store + def M_FLUSH_ALL = "b00101".U + def M_WOK = "b10111".U // check write permissions but don't perform a write + + // compatibility mode + object cfg { + val nSets: Int = parameter.nSets + val nWays: Int = parameter.nWays + val nSectors: Int = parameter.nSectors + val nSuperpageEntries: Int = parameter.nSuperpageEntries } - - /** returns the ppn of the input TLBEntryData */ - def ppn(vpn: UInt, data: TLBEntryData) = { - val supervisorVPNBits = pgLevels * pgLevelBits - if (superpage && usingVM) { - var res = data.ppn >> pgLevelBits * (pgLevels - 1) - for (j <- 1 until pgLevels) { - val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B - res = Cat( - res, - (Mux(ignore, vpn, 0.U) | data.ppn)( - supervisorVPNBits - j * pgLevelBits - 1, - supervisorVPNBits - (j + 1) * pgLevelBits - ) - ) - } - res - } else { - data.ppn + object PopCountAtLeast { + private def two(x: UInt): (Bool, Bool) = x.getWidth match { + case 1 => (x.asBool, false.B) + case n => + val half = x.getWidth / 2 + val (leftOne, leftTwo) = two(x(half - 1, 0)) + val (rightOne, rightTwo) = two(x(x.getWidth - 1, half)) + (leftOne || rightOne, leftTwo || rightTwo || (leftOne && rightOne)) + } + def apply(x: UInt, n: Int): Bool = n match { + case 0 => true.B + case 1 => x.orR + case 2 => two(x)._2 + case 3 => PopCount(x) >= n.U } } - /** does the refill - * - * find the target entry with vpn tag - * and replace the target entry with the input entry data - */ - def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = { - this.tag_vpn := vpn - this.tag_v := virtual - this.level := level.extract(log2Ceil(pgLevels - superpageOnly.toInt) - 1, 0) - - val idx = sectorIdx(vpn) - valid(idx) := true.B - data(idx) := entry.asUInt - } + // end - def invalidate(): Unit = { valid.foreach(_ := false.B) } - def invalidate(virtual: Bool): Unit = { - for ((v, e) <- valid.zip(entry_data)) - when(tag_v === virtual) { v := false.B } - } - def invalidateVPN(vpn: UInt, virtual: Bool): Unit = { - if (superpage) { - when(hit(vpn, virtual)) { invalidate() } - } else { - when(sectorTagMatch(vpn, virtual)) { - for (((v, e), i) <- (valid.zip(entry_data)).zipWithIndex) - when(tag_v === virtual && i.U === sectorIdx(vpn)) { v := false.B } - } - } - // For fragmented superpage mappings, we assume the worst (largest) - // case, and zap entries whose most-significant VPNs match - when(((tag_vpn ^ vpn) >> (pgLevelBits * (pgLevels - 1))) === 0.U) { - for ((v, e) <- valid.zip(entry_data)) - when(tag_v === virtual && e.fragmented_superpage) { v := false.B } - } - } - def invalidateNonGlobal(virtual: Bool): Unit = { - for ((v, e) <- valid.zip(entry_data)) - when(tag_v === virtual && !e.g) { v := false.B } - } -} + val pmp: Instance[PMPChecker] = Instantiate(new PMPChecker(parameter.pmpCheckerParameter)) + + // io.ptw.customCSRs := DontCare -/** TLB config - * - * @param nSets the number of sets of PTE, follow [[ICacheParams.nSets]] - * @param nWays the total number of wayss of PTE, follow [[ICacheParams.nWays]] - * @param nSectors the number of ways in a single PTE TLBEntry - * @param nSuperpageEntries the number of SuperpageEntries - */ -case class TLBConfig( - nSets: Int, - nWays: Int, - nSectors: Int = 4, - nSuperpageEntries: Int = 4) - -/** =Overview= - * [[TLB]] is a TLB template which contains PMA logic and PMP checker. - * - * TLB caches PTE and accelerates the address translation process. - * When tlb miss happens, ask PTW(L2TLB) for Page Table Walk. - * Perform PMP and PMA check during the translation and throw exception if there were any. - * - * ==Cache Structure== - * - Sectored Entry (PTE) - * - set-associative or direct-mapped - * - nsets = [[TLBConfig.nSets]] - * - nways = [[TLBConfig.nWays]] / [[TLBConfig.nSectors]] - * - PTEEntry( sectors = [[TLBConfig.nSectors]] ) - * - LRU(if set-associative) - * - * - Superpage Entry(superpage PTE) - * - fully associative - * - nsets = [[TLBConfig.nSuperpageEntries]] - * - PTEEntry(sectors = 1) - * - PseudoLRU - * - * - Special Entry(PTE across PMP) - * - nsets = 1 - * - PTEEntry(sectors = 1) - * - * ==Address structure== - * {{{ - * |vaddr | - * |ppn/vpn | pgIndex | - * | | | - * | |nSets |nSector | |}}} - * - * ==State Machine== - * {{{ - * s_ready: ready to accept request from CPU. - * s_request: when L1TLB(this) miss, send request to PTW(L2TLB), . - * s_wait: wait for PTW to refill L1TLB. - * s_wait_invalidate: L1TLB is waiting for respond from PTW, but L1TLB will invalidate respond from PTW.}}} - * - * ==PMP== - * pmp check - * - special_entry: always check - * - other entry: check on refill - * - * ==Note== - * PMA consume diplomacy parameter generate physical memory address checking logic - * - * Boom use Rocket ITLB, and its own DTLB. - * - * Accelerators:{{{ - * sha3: DTLB - * gemmini: DTLB - * hwacha: DTLB*2+ITLB}}} - * @param instruction true for ITLB, false for DTLB - * @param lgMaxSize @todo seems granularity - * @param cfg [[TLBConfig]] - * @param edge collect SoC metadata. - */ -class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: TLEdgeOut, p: Parameters) - extends CoreModule()(p) { - val io = IO(new Bundle { - - /** request from Core */ - val req = Flipped(Decoupled(new TLBReq(lgMaxSize))) - - /** response to Core */ - val resp = Output(new TLBResp()) - - /** SFence Input */ - val sfence = Flipped(Valid(new SFenceReq)) - - /** IO to PTW */ - val ptw = new TLBPTWIO - - /** suppress a TLB refill, one cycle after a miss */ - val kill = Input(Bool()) - }) - io.ptw.customCSRs := DontCare - - val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits) + val pageGranularityPMPs = pmpGranularity >= (1 << parameter.pgIdxBits) val vpn = io.req.bits.vaddr(vaddrBits - 1, pgIdxBits) /** index for sectored_Entry */ - val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2) + val memIdx = if (log2Ceil(cfg.nSets) == 0) 0.U else vpn(log2Ceil(cfg.nSectors) + log2Ceil(cfg.nSets) - 1, log2Ceil(cfg.nSectors)) /** TLB Entry */ - val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false)))) + // val superpage: Boolean = false, val superpageOnly: Boolean = false + val sectored_entries = Reg( + Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, pgLevels, vpnBits, ppnBits))) + ) /** Superpage Entry */ - val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true))) + // val superpage: Boolean = true, val superpageOnly: Boolean = true + val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, pgLevels, vpnBits, ppnBits))) /** Special Entry * * If PMP granularity is less than page size, thus need additional "special" entry manage PMP. */ - val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false))) + // val superpage: Boolean = true, val superpageOnly: Boolean = false + val special_entry = Option.when(!pageGranularityPMPs)(Reg(new TLBEntry(1, pgLevels, vpnBits, ppnBits))) def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries def all_entries = ordinary_entries ++ special_entry + def allEntries = + sectored_entries(memIdx).map(tlb => (tlb, false, false)) ++ + superpage_entries.map(tlb => (tlb, true, true)) ++ + special_entry.map(tlb => (tlb, true, false)) + def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) @@ -461,51 +288,59 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T /** refill signal */ val do_refill = usingVM.B && io.ptw.resp.valid + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + /** sfence invalidate refill */ - val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid + val invalidate_refill = isOneOf(state, Seq(s_request /* don't care */, s_wait_invalidate)) || io.sfence.valid // PMP - val mpu_ppn = Mux( + val mpu_ppn = Mux[UInt]( do_refill, refill_ppn, Mux( vm_enabled && special_entry.nonEmpty.B, - special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), + special_entry.map(e => TLBEntry.ppn(e, vpn, TLBEntry.getData(e, vpn), usingVM, pgLevelBits, true, false)).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits ) ) val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits - 1, 0)) val mpu_priv = Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */ ), PRV.S.U, Cat(io.ptw.status.debug, priv)) - val pmp = Module(new PMPChecker(lgMaxSize)) pmp.io.addr := mpu_physaddr pmp.io.size := io.req.bits.size pmp.io.pmp := (io.ptw.pmp: Seq[PMP]) pmp.io.prv := mpu_priv // PMA + val pma = Instantiate(new PMAChecker(parameter.pmaCheckerParameter)) // check exist a slave can consume this address. - val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_ || _) - // check utility to help check SoC property. - def fastCheck(member: TLManagerParameters => Boolean) = - legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b: Boolean) => b.B) + pma.io.paddr := mpu_physaddr // todo: using DataScratchpad doesn't support cacheable. - val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B - val homogeneous = - TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous + def checkCacheable: Bool = pma.io.resp.cacheable + def checkR: Bool = pma.io.resp.r + def checkW: Bool = pma.io.resp.w + def checkPP: Bool = pma.io.resp.pp + def checkAL: Bool = pma.io.resp.al + def checkAA: Bool = pma.io.resp.aa + def checkX: Bool = pma.io.resp.x + def checkEFF: Bool = pma.io.resp.eff + // In M mode, if access DM address(debug module program buffer) - val deny_access_to_debug = - mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) - val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r - val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w - val prot_pp = fastCheck(_.supportsPutPartial) - val prot_al = fastCheck(_.supportsLogical) - val prot_aa = fastCheck(_.supportsArithmetic) - val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x - val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType) + // @todo val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous + val homogeneous = true.B + // val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) + val deny_access_to_debug: Bool = false.B + val cacheable: Bool = checkCacheable && (instruction || !usingDataScratchpad).B + val prot_r: Bool = checkR && !deny_access_to_debug && pmp.io.r + val prot_w: Bool = checkW && !deny_access_to_debug && pmp.io.w + val prot_pp: Bool = checkPP + val prot_al: Bool = checkAL + val prot_aa: Bool = checkAA + val prot_x: Bool = checkX && !deny_access_to_debug && pmp.io.x + val prot_eff: Bool = checkEFF // hit check - val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn, priv_v)) - val superpage_hits = superpage_entries.map(_.hit(vpn, priv_v)) - val hitsVec = all_entries.map(vm_enabled && _.hit(vpn, priv_v)) + val sector_hits = sectored_entries(memIdx).map(tlbEntry => TLBEntry.sectorHit(tlbEntry, vpn, priv_v)) + val superpage_hits = superpage_entries.map(tlbEntry => TLBEntry.hit(tlbEntry, vpn, priv_v, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = true, superpageOnly = true)) + val hitsVec = VecInit(allEntries.map{case (tlbEntry, superpage, superpageOnly) => vm_enabled && TLBEntry.hit(tlbEntry, vpn, priv_v, usingVM: Boolean, pgLevelBits: Int, hypervisorExtraAddrBits: Int, superpage, superpageOnly)}) val real_hits = hitsVec.asUInt val hits = Cat(!vm_enabled, real_hits) @@ -514,7 +349,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T when(do_refill) { val pte = io.ptw.resp.bits.pte val refill_v = r_vstage1_en || r_stage2_en - val newEntry = Wire(new TLBEntryData) + val newEntry = Wire(new TLBEntryData(ppnBits)) newEntry.ppn := pte.ppn newEntry.c := cacheable newEntry.u := pte.u @@ -527,9 +362,9 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T newEntry.hr := io.ptw.resp.bits.hr newEntry.hw := io.ptw.resp.bits.hw newEntry.hx := io.ptw.resp.bits.hx - newEntry.sr := pte.sr() - newEntry.sw := pte.sw() - newEntry.sx := pte.sx() + newEntry.sr := PTE.sr(pte) + newEntry.sw := PTE.sw(pte) + newEntry.sx := PTE.sx(pte) newEntry.pr := prot_r newEntry.pw := prot_w newEntry.px := prot_x @@ -540,21 +375,23 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T newEntry.fragmented_superpage := io.ptw.resp.bits.fragmented_superpage // refill special_entry when(special_entry.nonEmpty.B && !io.ptw.resp.bits.homogeneous) { - special_entry.foreach(_.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry)) + special_entry.foreach(tlbEntry => TLBEntry.insert(tlbEntry, r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry, superpageOnly = false)) }.elsewhen(io.ptw.resp.bits.level < (pgLevels - 1).U) { val waddr = Mux(r_superpage_hit.valid && usingHypervisor.B, r_superpage_hit.bits, r_superpage_repl_addr) for ((e, i) <- superpage_entries.zipWithIndex) when(r_superpage_repl_addr === i.U) { - e.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry) - when(invalidate_refill) { e.invalidate() } + TLBEntry.insert(e, r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry, superpageOnly = true) + when(invalidate_refill) { + TLBEntry.invalidate(e) + } } // refill sectored_hit }.otherwise { - val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2) + val r_memIdx = if(log2Ceil(cfg.nSets) == 0) 0.U else (r_refill_tag(log2Ceil(cfg.nSectors) + log2Ceil(cfg.nSets) - 1, log2Ceil(cfg.nSectors))) val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr) for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when(waddr === i.U) { - when(!r_sectored_hit.valid) { e.invalidate() } - e.insert(r_refill_tag, refill_v, 0.U, newEntry) - when(invalidate_refill) { e.invalidate() } + when(!r_sectored_hit.valid) { TLBEntry.invalidate(e) } + TLBEntry.insert(e, r_refill_tag, refill_v, 0.U, newEntry, superpageOnly = false) + when(invalidate_refill) { TLBEntry.invalidate(e) } } } @@ -564,75 +401,75 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T } // get all entries data. - val entries = all_entries.map(_.getData(vpn)) + val entries = all_entries.map(tlbEntry => TLBEntry.getData(tlbEntry, vpn)) val normal_entries = entries.take(ordinary_entries.size) // parallel query PPN from [[all_entries]], if VM not enabled return VPN instead val ppn = Mux1H( hitsVec :+ !vm_enabled, - (all_entries.zip(entries)).map { case (entry, data) => entry.ppn(vpn, data) } :+ vpn(ppnBits - 1, 0) + allEntries.zip(entries).map { case ((entry, superpage, superpageOnly), data) => TLBEntry.ppn(entry, vpn, data, usingVM, pgLevelBits: Int, superpage, superpageOnly) } :+ vpn(ppnBits - 1, 0) ) val nPhysicalEntries = 1 + special_entry.size // generally PTW misaligned load exception. - val ptw_ae_array = Cat(false.B, entries.map(_.ae_ptw).asUInt) - val final_ae_array = Cat(false.B, entries.map(_.ae_final).asUInt) - val ptw_pf_array = Cat(false.B, entries.map(_.pf).asUInt) - val ptw_gf_array = Cat(false.B, entries.map(_.gf).asUInt) + val ptw_ae_array = Cat(false.B, VecInit(entries.map(_.ae_ptw)).asUInt) + val final_ae_array = Cat(false.B, VecInit(entries.map(_.ae_final)).asUInt) + val ptw_pf_array = Cat(false.B, VecInit(entries.map(_.pf)).asUInt) + val ptw_gf_array = Cat(false.B, VecInit(entries.map(_.gf)).asUInt) val sum = Mux(priv_v, io.ptw.gstatus.sum, io.ptw.status.sum) // if in hypervisor/machine mode, cannot read/write user entries. // if in superviosr/user mode, "If the SUM bit in the sstatus register is set, supervisor mode software may also access pages with U=1.(from spec)" - val priv_rw_ok = Mux(!priv_s || sum, entries.map(_.u).asUInt, 0.U) | Mux(priv_s, ~entries.map(_.u).asUInt, 0.U) + val priv_rw_ok = Mux(!priv_s || sum, VecInit(entries.map(_.u)).asUInt, 0.U) | Mux(priv_s, ~VecInit(entries.map(_.u)).asUInt, 0.U) // if in hypervisor/machine mode, other than user pages, all pages are executable. // if in superviosr/user mode, only user page can execute. - val priv_x_ok = Mux(priv_s, ~entries.map(_.u).asUInt, entries.map(_.u).asUInt) + val priv_x_ok = Mux(priv_s, ~VecInit(entries.map(_.u)).asUInt, VecInit(entries.map(_.u)).asUInt) val stage1_bypass = - Fill(entries.size, usingHypervisor.B) & (Fill(entries.size, !stage1_en) | entries.map(_.ae_stage2).asUInt) + Fill(entries.size, usingHypervisor.B) & (Fill(entries.size, !stage1_en) | VecInit(entries.map(_.ae_stage2)).asUInt) val mxr = io.ptw.status.mxr | Mux(priv_v, io.ptw.gstatus.mxr, false.B) // "The vsstatus field MXR, which makes execute-only pages readable, only overrides VS-stage page protection.(from spec)" val r_array = - Cat(true.B, (priv_rw_ok & (entries.map(_.sr).asUInt | Mux(mxr, entries.map(_.sx).asUInt, 0.U))) | stage1_bypass) - val w_array = Cat(true.B, (priv_rw_ok & entries.map(_.sw).asUInt) | stage1_bypass) - val x_array = Cat(true.B, (priv_x_ok & entries.map(_.sx).asUInt) | stage1_bypass) + Cat(true.B, (priv_rw_ok & (VecInit(entries.map(_.sr)).asUInt | Mux(mxr, VecInit(entries.map(_.sx)).asUInt, 0.U))) | stage1_bypass) + val w_array = Cat(true.B, (priv_rw_ok & VecInit(entries.map(_.sw)).asUInt) | stage1_bypass) + val x_array = Cat(true.B, (priv_x_ok & VecInit(entries.map(_.sx)).asUInt) | stage1_bypass) val stage2_bypass = Fill(entries.size, !stage2_en) val hr_array = - Cat(true.B, entries.map(_.hr).asUInt | Mux(io.ptw.status.mxr, entries.map(_.hx).asUInt, 0.U) | stage2_bypass) - val hw_array = Cat(true.B, entries.map(_.hw).asUInt | stage2_bypass) - val hx_array = Cat(true.B, entries.map(_.hx).asUInt | stage2_bypass) + Cat(true.B, VecInit(entries.map(_.hr)).asUInt | Mux(io.ptw.status.mxr, VecInit(entries.map(_.hx)).asUInt, 0.U) | stage2_bypass) + val hw_array = Cat(true.B, VecInit(entries.map(_.hw)).asUInt | stage2_bypass) + val hx_array = Cat(true.B, VecInit(entries.map(_.hx)).asUInt | stage2_bypass) // These array is for each TLB entries. // user mode can read: PMA OK, TLB OK, AE OK - val pr_array = Cat(Fill(nPhysicalEntries, prot_r), normal_entries.map(_.pr).asUInt) & ~(ptw_ae_array | final_ae_array) + val pr_array = Cat(Fill(nPhysicalEntries, prot_r), VecInit(normal_entries.map(_.pr)).asUInt) & ~(ptw_ae_array | final_ae_array) // user mode can write: PMA OK, TLB OK, AE OK - val pw_array = Cat(Fill(nPhysicalEntries, prot_w), normal_entries.map(_.pw).asUInt) & ~(ptw_ae_array | final_ae_array) + val pw_array = Cat(Fill(nPhysicalEntries, prot_w), VecInit(normal_entries.map(_.pw)).asUInt) & ~(ptw_ae_array | final_ae_array) // user mode can write: PMA OK, TLB OK, AE OK - val px_array = Cat(Fill(nPhysicalEntries, prot_x), normal_entries.map(_.px).asUInt) & ~(ptw_ae_array | final_ae_array) + val px_array = Cat(Fill(nPhysicalEntries, prot_x), VecInit(normal_entries.map(_.px)).asUInt) & ~(ptw_ae_array | final_ae_array) // put effect - val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt) + val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), VecInit(normal_entries.map(_.eff)).asUInt) // cacheable - val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt) + val c_array = Cat(Fill(nPhysicalEntries, cacheable), VecInit(normal_entries.map(_.c)).asUInt) // put partial - val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt) + val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), VecInit(normal_entries.map(_.ppp)).asUInt) // atomic arithmetic - val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), normal_entries.map(_.paa).asUInt) + val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), VecInit(normal_entries.map(_.paa)).asUInt) // atomic logic - val pal_array = Cat(Fill(nPhysicalEntries, prot_al), normal_entries.map(_.pal).asUInt) + val pal_array = Cat(Fill(nPhysicalEntries, prot_al), VecInit(normal_entries.map(_.pal)).asUInt) val ppp_array_if_cached = ppp_array | c_array val paa_array_if_cached = paa_array | (if (usingAtomicsInCache) c_array else 0.U) val pal_array_if_cached = pal_array | (if (usingAtomicsInCache) c_array else 0.U) - val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries - 1), normal_entries.map(_.c).asUInt) + val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries - 1), VecInit(normal_entries.map(_.c)).asUInt) // vaddr misaligned: vaddr[1:0]=b00 val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR def badVA(guestPA: Boolean): Bool = { - val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels + val additionalPgLevels = PTBR.additionalPgLevels(if (guestPA) io.ptw.hgatp else satp, pgLevels, minPgLevels) val extraBits = if (guestPA) hypervisorExtraAddrBits else 0 val signed = !guestPA val nPgLevelChoices = pgLevels - minPgLevels + 1 val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits - (for (i <- 0 until nPgLevelChoices) yield { - val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.toInt))).U + VecInit((for (i <- 0 until nPgLevelChoices) yield { + val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - (if(signed) 1 else 0)))).U val maskedVAddr = io.req.bits.vaddr & mask additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask) - }).orR + })).asUInt.orR } val bad_gpa = if (!usingHypervisor) false.B @@ -641,15 +478,20 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B else vm_enabled && stage1_en && badVA(false) - val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(M_XLR, M_XSC) + val cmd_lrsc = usingAtomics.B && isOneOf(io.req.bits.cmd, Seq(M_XLR, M_XSC)) + def isAMOLogical(cmd: UInt) = isOneOf(cmd, Seq(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND)) val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd) + def isAMOArithmetic(cmd: UInt) = isOneOf(cmd, Seq(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU)) val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd) val cmd_put_partial = io.req.bits.cmd === M_PWR + def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd) + def isRead(cmd: UInt) = isOneOf(cmd, Seq(M_XRD, M_HLVX, M_XLR, M_XSC)) || isAMO(cmd) val cmd_read = isRead(io.req.bits.cmd) val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd) val cmd_write = isWrite(io.req.bits.cmd) val cmd_write_perms = cmd_write || - io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions + isOneOf(io.req.bits.cmd, Seq(M_FLUSH_ALL, M_WOK)) // not a write, but needs write permissions val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array) val ae_array = @@ -691,8 +533,8 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val superpage_plru = new PseudoLRU(superpage_entries.size) when(io.req.valid && vm_enabled) { // replace - when(sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) } - when(superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) } + when(VecInit(sector_hits).asUInt.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) } + when(VecInit(superpage_hits).asUInt.orR) { superpage_plru.access(OHToUInt(superpage_hits)) } } // Superpages create the possibility that two entries in the TLB may match. @@ -722,9 +564,12 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T io.resp.ma.inst := false.B // this is up to the pipeline to figure out io.resp.cacheable := (c_array & hits).orR io.resp.must_alloc := (must_alloc_array & hits).orR - io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers - .forall(m => !m.supportsAcquireB || m.supportsHint) - .B + + // io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers + // .forall(m => !m.supportsAcquireB || m.supportsHint) + // .B + // prefetch range + io.resp.prefetchable := (prefetchable_array & hits).orR io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits - 1, 0)) io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte @@ -759,9 +604,9 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T r_stage2_en := stage2_en r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way) r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx)) - r_sectored_hit.valid := sector_hits.orR + r_sectored_hit.valid := VecInit(sector_hits).asUInt.orR r_sectored_hit.bits := OHToUInt(sector_hits) - r_superpage_hit.valid := superpage_hits.orR + r_superpage_hit.valid := VecInit(superpage_hits).asUInt.orR r_superpage_hit.bits := OHToUInt(superpage_hits) } // Handle SFENCE.VMA when send request to PTW. @@ -792,35 +637,33 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T // SFENCE processing logic. when(sfence) { assert(!io.sfence.bits.rs1 || (io.sfence.bits.addr >> pgIdxBits) === vpn) - for (e <- all_real_entries) { - val hv = usingHypervisor.B && io.sfence.bits.hv - val hg = usingHypervisor.B && io.sfence.bits.hg - when(!hg && io.sfence.bits.rs1) { e.invalidateVPN(vpn, hv) } - .elsewhen(!hg && io.sfence.bits.rs2) { e.invalidateNonGlobal(hv) } - .otherwise { e.invalidate(hv || hg) } + val hv = usingHypervisor.B && io.sfence.bits.hv + val hg = usingHypervisor.B && io.sfence.bits.hg + sectored_entries.flatten.foreach{ e => + when(!hg && io.sfence.bits.rs1) { TLBEntry.invalidateVPN(e, vpn, hv, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = false, superpageOnly = false) } + .elsewhen(!hg && io.sfence.bits.rs2) { TLBEntry.invalidateNonGlobal(e, hv) } + .otherwise { TLBEntry.invalidateNonGlobal(e, hv || hg) } + } + superpage_entries.foreach { e => + when(!hg && io.sfence.bits.rs1) { TLBEntry.invalidateVPN(e, vpn, hv, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = true, superpageOnly = true) } + .elsewhen(!hg && io.sfence.bits.rs2) { TLBEntry.invalidateNonGlobal(e, hv) } + .otherwise { TLBEntry.invalidateNonGlobal(e, hv || hg) } + } + special_entry.foreach { e => + when(!hg && io.sfence.bits.rs1) { TLBEntry.invalidateVPN(e, vpn, hv, usingVM, pgLevelBits, hypervisorExtraAddrBits, superpage = true, superpageOnly = false) } + .elsewhen(!hg && io.sfence.bits.rs2) { TLBEntry.invalidateNonGlobal(e, hv) } + .otherwise { TLBEntry.invalidateNonGlobal(e, hv || hg) } } } when(io.req.fire && vsatp_mode_mismatch) { - all_real_entries.foreach(_.invalidate(true.B)) + all_real_entries.foreach(tlbEntry => TLBEntry.invalidate(tlbEntry, true.B)) v_entries_use_stage1 := vstage1_en } - when(multipleHits || reset.asBool) { - all_real_entries.foreach(_.invalidate()) + when(multipleHits || io.reset.asBool) { + all_real_entries.foreach(tlbEntry => TLBEntry.invalidate(tlbEntry)) } - - ccover(io.ptw.req.fire, "MISS", "TLB miss") - ccover(io.ptw.req.valid && !io.ptw.req.ready, "PTW_STALL", "TLB miss, but PTW busy") - ccover(state === s_wait_invalidate, "SFENCE_DURING_REFILL", "flush TLB during TLB refill") - ccover(sfence && !io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_ALL", "flush TLB") - ccover(sfence && !io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_ASID", "flush TLB ASID") - ccover(sfence && io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_LINE", "flush TLB line") - ccover(sfence && io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_LINE_ASID", "flush TLB line/ASID") - ccover(multipleHits, "MULTIPLE_HITS", "Two matching translations in TLB") } - def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - property.cover(cond, s"${if (instruction) "I" else "D"}TLB_$label", "MemorySystem;;" + desc) - /** Decides which entry to be replaced * * If there is a invalid entry, replace it with priorityencoder; @@ -829,7 +672,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T * @return mask for TLBEntry replacement */ def replacementEntry(set: Seq[TLBEntry], alt: UInt) = { - val valids = set.map(_.valid.orR).asUInt + val valids = VecInit(set.map(_.valid.asUInt.orR)).asUInt Mux(valids.andR, alt, PriorityEncoder(~valids)) } } From cba7125d6c57f736da970c0ab2afb4cd0610a991 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 16:37:11 +0800 Subject: [PATCH 069/140] [rocketv] add elaborator for TLB - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.TLB config --useAsyncReset true --xLen 32 --nSets 1 --nWays 32 --nSectors 4 --nSuperpageEntries 4 --asidBits 0 --pgLevels 2 --usingHypervisor false --usingAtomics true --usingDataScratchpad false --usingAtomicsOnlyForIO false --usingVM false --usingAtomicsInCache false --nPMPs 8 --paddrBits 32 --legal 00000000-ffffffff --cacheable 80000000-ffffffff --read 00000000-ffffffff --write 00000000-ffffffff --putPartial 00000000-ffffffff --exec 80000000-ffffffff --sideEffects 00000000-3fffffff --isITLB true - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.TLB design --parameter ./TLB.json --run-firtool --- elaborator/src/rocketv/TLB.scala | 111 +++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 elaborator/src/rocketv/TLB.scala diff --git a/elaborator/src/rocketv/TLB.scala b/elaborator/src/rocketv/TLB.scala new file mode 100644 index 000000000..1ee1ff458 --- /dev/null +++ b/elaborator/src/rocketv/TLB.scala @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{TLB, TLBParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object TLB extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class TLBParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "nSets") nSets: Int, + @arg(name = "nWays") nWays: Int, + @arg(name = "nSectors") nSectors: Int, + @arg(name = "nSuperpageEntries") nSuperpageEntries: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "pgLevels") pgLevels: Int, + @arg(name = "usingHypervisor") usingHypervisor: Boolean, + @arg(name = "usingAtomics") usingAtomics: Boolean, + @arg(name = "usingDataScratchpad") usingDataScratchpad: Boolean, + @arg(name = "usingAtomicsOnlyForIO") usingAtomicsOnlyForIO: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingAtomicsInCache") usingAtomicsInCache: Boolean, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "legal") legal: Seq[BitSet], + @arg(name = "cacheable") cacheable: Seq[BitSet], + @arg(name = "read") read: Seq[BitSet], + @arg(name = "write") write: Seq[BitSet], + @arg(name = "putPartial") putPartial: Seq[BitSet], + @arg(name = "logic") logic: Seq[BitSet], + @arg(name = "arithmetic") arithmetic: Seq[BitSet], + @arg(name = "exec") exec: Seq[BitSet], + @arg(name = "sideEffects") sideEffects: Seq[BitSet], + @arg(name = "isITLB") isITLB: Boolean) { + def convert: TLBParameter = TLBParameter( + useAsyncReset, + xLen, + nSets, + nWays, + nSectors, + nSuperpageEntries, + asidBits, + pgLevels, + usingHypervisor, + usingAtomics, + usingDataScratchpad, + usingAtomicsOnlyForIO, + usingVM, + usingAtomicsInCache, + nPMPs, + PMAChecker.PMACheckerParameterMain( + paddrBits, + legal, + cacheable, + read, + write, + putPartial, + logic, + arithmetic, + exec, + sideEffects + ).convert, + paddrBits, + isITLB + ) + } + + implicit def TLBParameterMainParser: ParserForClass[TLBParameterMain] = ParserForClass[TLBParameterMain] + + @main + def config(@arg(name = "parameter") parameter: TLBParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[TLB, TLBParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From d2b11c387bb16e85887840f1b2743c45817f256f Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Mon, 1 Jul 2024 12:20:35 +0800 Subject: [PATCH 070/140] [rocketv] copy DCache into rocketv project --- rocketv/src/HellaCache.scala | 318 ++++++++++++++++++++++++++++ rocketv/src/HellaCacheArbiter.scala | 84 ++++++++ 2 files changed, 402 insertions(+) create mode 100644 rocketv/src/HellaCache.scala create mode 100644 rocketv/src/HellaCacheArbiter.scala diff --git a/rocketv/src/HellaCache.scala b/rocketv/src/HellaCache.scala new file mode 100644 index 000000000..77df10941 --- /dev/null +++ b/rocketv/src/HellaCache.scala @@ -0,0 +1,318 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +import chisel3._ +import chisel3.util.{log2Ceil, log2Up, Decoupled, Valid} +import freechips.rocketchip.amba._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ +import org.chipsalliance.cde.config.{Field, Parameters} + +import scala.collection.mutable.ListBuffer + +trait HasL1HellaCacheParameters extends HasL1CacheParameters with HasCoreParameters { + val cacheParams = tileParams.dcache.get + val cfg = cacheParams + + def wordBits = coreDataBits + def wordBytes = coreDataBytes + def subWordBits = cacheParams.subWordBits.getOrElse(wordBits) + def subWordBytes = subWordBits / 8 + def wordOffBits = log2Up(wordBytes) + def beatBytes = cacheBlockBytes / cacheDataBeats + def beatWords = beatBytes / wordBytes + def beatOffBits = log2Up(beatBytes) + def idxMSB = untagBits - 1 + def idxLSB = blockOffBits + def offsetmsb = idxLSB - 1 + def offsetlsb = wordOffBits + def rowWords = rowBits / wordBits + def doNarrowRead = coreDataBits * nWays % rowBits == 0 + def eccBytes = cacheParams.dataECCBytes + val eccBits = cacheParams.dataECCBytes * 8 + val encBits = cacheParams.dataCode.width(eccBits) + val encWordBits = encBits * (wordBits / eccBits) + def encDataBits = cacheParams.dataCode.width(coreDataBits) // NBDCache only + def encRowBits = encDataBits * rowWords + def lrscCycles = coreParams.lrscCycles // ISA requires 16-insn LRSC sequences to succeed + def lrscBackoff = 3 // disallow LRSC reacquisition briefly + def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant + def nIOMSHRs = cacheParams.nMMIOs + def maxUncachedInFlight = cacheParams.nMMIOs + def dataScratchpadSize = cacheParams.dataScratchpadBytes + + require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)") + if (!usingDataScratchpad) + require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)") + // would need offset addr for puts if data width < xlen + require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)") +} + +abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module with HasL1HellaCacheParameters + +abstract class L1HellaCacheBundle(implicit val p: Parameters) + extends ParameterizedBundle()(p) + with HasL1HellaCacheParameters + +/** Bundle definitions for HellaCache interfaces */ + +trait HasCoreMemOp extends HasL1HellaCacheParameters { + val addr = UInt(coreMaxAddrBits.W) + val idx = (usingVM && untagBits > pgIdxBits).option(UInt(coreMaxAddrBits.W)) + val tag = UInt((coreParams.dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) + val cmd = UInt(M_SZ.W) + val size = UInt(log2Ceil(coreDataBytes.log2 + 1).W) + val signed = Bool() + val dprv = UInt(PRV.SZ.W) + val dv = Bool() +} + +trait HasCoreData extends HasCoreParameters { + val data = UInt(coreDataBits.W) + val mask = UInt(coreDataBytes.W) +} + +class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp { + val phys = Bool() + val no_alloc = Bool() + val no_xcpt = Bool() +} + +class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData + +class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp with HasCoreData { + val replay = Bool() + val has_data = Bool() + val data_word_bypass = UInt(coreDataBits.W) + val data_raw = UInt(coreDataBits.W) + val store_data = UInt(coreDataBits.W) +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions + val pf = new AlignmentExceptions + val gf = new AlignmentExceptions + val ae = new AlignmentExceptions +} + +class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) with HasCoreData + +class HellaCachePerfEvents extends Bundle { + val acquire = Bool() + val release = Bool() + val grant = Bool() + val tlbMiss = Bool() + val blocked = Bool() + val canAcceptStoreThenLoad = Bool() + val canAcceptStoreThenRMW = Bool() + val canAcceptLoadThenLoad = Bool() + val storeBufferEmptyAfterLoad = Bool() + val storeBufferEmptyAfterStore = Bool() +} + +// interface between D$ and processor/DTLB +class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { + val req = Decoupled(new HellaCacheReq) + val s1_kill = Output(Bool()) // kill previous cycle's req + val s1_data = Output(new HellaCacheWriteData()) // data for previous cycle's req + val s2_nack = Input(Bool()) // req from two cycles ago is rejected + val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) + val s2_kill = Output(Bool()) // kill req from two cycles ago + val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO + val s2_paddr = Input(UInt(paddrBits.W)) // translated address + + val resp = Flipped(Valid(new HellaCacheResp)) + val replay_next = Input(Bool()) + val s2_xcpt = Input(new HellaCacheExceptions) + val s2_gpa = Input(UInt(vaddrBitsExtended.W)) + val s2_gpa_is_pte = Input(Bool()) + val uncached_resp = tileParams.dcache.get.separateUncachedResp.option(Flipped(Decoupled(new HellaCacheResp))) + val ordered = Input(Bool()) + val perf = Input(new HellaCachePerfEvents()) + + val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? + val clock_enabled = Input(Bool()) // is D$ currently being clocked? +} + +/** Base classes for Diplomatic TL2 HellaCaches */ + +abstract class HellaCache()(implicit p: Parameters) + extends LazyModule + with HasNonDiplomaticTileParameters { + protected val cfg = tileParams.dcache.get + + protected def cacheClientParameters = cfg.scratch + .map(x => Seq()) + .getOrElse( + Seq( + TLMasterParameters.v1( + name = s"Core DCache", + sourceId = IdRange(0, 1.max(cfg.nMSHRs)), + supportsProbe = TransferSizes(cfg.blockBytes, cfg.blockBytes) + ) + ) + ) + + protected def mmioClientParameters = Seq( + TLMasterParameters.v1( + name = s"Core DCache MMIO", + sourceId = IdRange(firstMMIO, firstMMIO + cfg.nMMIOs), + requestFifo = true + ) + ) + + def firstMMIO = (cacheClientParameters.map(_.sourceId.end) :+ 0).max + + val node = TLClientNode( + Seq( + TLMasterPortParameters.v1( + clients = cacheClientParameters ++ mmioClientParameters, + minLatency = 1, + requestFields = tileParams.core.useVM.option(Seq()).getOrElse(Seq(AMBAProtField())) + ) + ) + ) + + val hartIdSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) + val mmioAddressPrefixSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) + + val module: HellaCacheModule + + def flushOnFenceI = cfg.scratch.isEmpty && !node.edges + .out(0) + .manager + .managers + .forall(m => + !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED || m.regionType <= RegionType.IDEMPOTENT + ) + + def canSupportCFlushLine = !usingVM || cfg.blockBytes * cfg.nSets <= (1 << pgIdxBits) + + require(!tileParams.core.haveCFlush || cfg.scratch.isEmpty, "CFLUSH_D_L1 instruction requires a D$") +} + +class HellaCacheBundle(val outer: HellaCache)(implicit p: Parameters) extends CoreBundle()(p) { + val cpu = Flipped((new HellaCacheIO)) + val ptw = new TLBPTWIO() + val errors = new DCacheErrors +} + +class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer) with HasL1HellaCacheParameters { + implicit val edge = outer.node.edges.out(0) + val (tl_out, _) = outer.node.out(0) + val io = IO(new HellaCacheBundle(outer)) + val io_hartid = outer.hartIdSinkNodeOpt.map(_.bundle) + val io_mmio_address_prefix = outer.mmioAddressPrefixSinkNodeOpt.map(_.bundle) + dontTouch(io.cpu.resp) // Users like to monitor these fields even if the core ignores some signals + dontTouch(io.cpu.s1_data) + + require(rowBits == edge.bundle.dataBits) + + private val fifoManagers = edge.manager.managers.filter(TLFIFOFixer.allVolatile) + fifoManagers.foreach { m => + require( + m.fifoId == fifoManagers.head.fifoId, + s"IOMSHRs must be FIFO for all regions with effects, but HellaCache sees\n" + + s"${m.nodePath.map(_.name)}\nversus\n${fifoManagers.head.nodePath.map(_.name)}" + ) + } +} + +/** Support overriding which HellaCache is instantiated */ + +case object BuildHellaCache extends Field[BaseTile => Parameters => HellaCache](HellaCacheFactory.apply) + +object HellaCacheFactory { + def apply(tile: BaseTile)(p: Parameters): HellaCache = { + assert(tile.tileParams.dcache.get.nMSHRs == 0) + new DCache(tile.crossing)(p) + } +} + +/** Mix-ins for constructing tiles that have a HellaCache */ + +trait HasHellaCache { this: BaseTile => + val module: HasHellaCacheModule + implicit val p: Parameters + var nDCachePorts = 0 + lazy val dcache: HellaCache = LazyModule(p(BuildHellaCache)(this)(p)) + + tlMasterXbar.node := TLWidthWidget(tileParams.dcache.get.rowBits / 8) := dcache.node + dcache.hartIdSinkNodeOpt.map { _ := hartIdNexusNode } + dcache.mmioAddressPrefixSinkNodeOpt.map { _ := mmioAddressPrefixNexusNode } + InModuleBody { + dcache.module match { + case module: DCacheModule => module.tlb_port := DontCare + case other => other + } + } +} + +trait HasHellaCacheModule { + val outer: HasHellaCache with HasTileParameters + implicit val p: Parameters + val dcachePorts = ListBuffer[HellaCacheIO]() + val dcacheArb = Module(new HellaCacheArbiter(outer.nDCachePorts)(outer.p)) + outer.dcache.module.io.cpu <> dcacheArb.io.mem +} + +/** Metadata array used for all HellaCaches */ + +class L1Metadata(implicit p: Parameters) extends L1HellaCacheBundle()(p) { + val coh = new ClientMetadata + val tag = UInt(tagBits.W) +} + +object L1Metadata { + def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { + val meta = Wire(new L1Metadata) + meta.tag := tag + meta.coh := coh + meta + } +} + +class L1MetaReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { + val idx = UInt(idxBits.W) + val way_en = UInt(nWays.W) + val tag = UInt(tagBits.W) +} + +class L1MetaWriteReq(implicit p: Parameters) extends L1MetaReadReq()(p) { + val data = new L1Metadata +} + +class L1MetadataArray[T <: L1Metadata](onReset: () => T)(implicit p: Parameters) extends L1HellaCacheModule()(p) { + val rstVal = onReset() + val io = IO(new Bundle { + val read = Flipped(Decoupled(new L1MetaReadReq)) + val write = Flipped(Decoupled(new L1MetaWriteReq)) + val resp = Output(Vec(nWays, rstVal.cloneType)) + }) + + val rst_cnt = RegInit(0.U(log2Up(nSets + 1).W)) + val rst = rst_cnt < nSets.U + val waddr = Mux(rst, rst_cnt, io.write.bits.idx) + val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt + val wmask = Mux(rst || (nWays == 1).B, (-1).S, io.write.bits.way_en.asSInt).asBools + val rmask = Mux(rst || (nWays == 1).B, (-1).S, io.read.bits.way_en.asSInt).asBools + when(rst) { rst_cnt := rst_cnt + 1.U } + + val metabits = rstVal.getWidth + val tag_array = SyncReadMem(nSets, Vec(nWays, UInt(metabits.W))) + val wen = rst || io.write.valid + when(wen) { + tag_array.write(waddr, VecInit.fill(nWays)(wdata), wmask) + } + io.resp := tag_array.read(io.read.bits.idx, io.read.fire).map(_.asTypeOf(chiselTypeOf(rstVal))) + + io.read.ready := !wen // so really this could be a 6T RAM + io.write.ready := !rst +} diff --git a/rocketv/src/HellaCacheArbiter.scala b/rocketv/src/HellaCacheArbiter.scala new file mode 100644 index 000000000..f4d60eeca --- /dev/null +++ b/rocketv/src/HellaCacheArbiter.scala @@ -0,0 +1,84 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +import chisel3._ +import chisel3.util.{Cat, log2Up} + +class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { + val io = IO(new Bundle { + val requestor = Flipped(Vec(n, new HellaCacheIO)) + val mem = new HellaCacheIO + }) + + if (n == 1) { + io.mem <> io.requestor.head + } else { + val s1_id = Reg(UInt()) + val s2_id = RegNext(s1_id) + + io.mem.keep_clock_enabled := io.requestor.map(_.keep_clock_enabled).reduce(_ || _) + + io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_ || _) + io.requestor(0).req.ready := io.mem.req.ready + for (i <- 1 until n) + io.requestor(i).req.ready := io.requestor(i - 1).req.ready && !io.requestor(i - 1).req.valid + + for (i <- n - 1 to 0 by -1) { + val req = io.requestor(i).req + def connect_s0() = { + io.mem.req.bits := req.bits + io.mem.req.bits.tag := Cat(req.bits.tag, i.U(log2Up(n).W)) + s1_id := i.U + } + def connect_s1() = { + io.mem.s1_kill := io.requestor(i).s1_kill + io.mem.s1_data := io.requestor(i).s1_data + } + def connect_s2() = { + io.mem.s2_kill := io.requestor(i).s2_kill + } + + if (i == n - 1) { + connect_s0() + connect_s1() + connect_s2() + } else { + when(req.valid) { connect_s0() } + when(s1_id === i.U) { connect_s1() } + when(s2_id === i.U) { connect_s2() } + } + } + + io.mem.uncached_resp.foreach(_.ready := false.B) + + for (i <- 0 until n) { + val resp = io.requestor(i).resp + val tag_hit = io.mem.resp.bits.tag(log2Up(n) - 1, 0) === i.U + resp.valid := io.mem.resp.valid && tag_hit + io.requestor(i).s2_xcpt := io.mem.s2_xcpt + io.requestor(i).s2_gpa := io.mem.s2_gpa + io.requestor(i).s2_gpa_is_pte := io.mem.s2_gpa_is_pte + io.requestor(i).ordered := io.mem.ordered + io.requestor(i).perf := io.mem.perf + io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === i.U + io.requestor(i).s2_nack_cause_raw := io.mem.s2_nack_cause_raw + io.requestor(i).s2_uncached := io.mem.s2_uncached + io.requestor(i).s2_paddr := io.mem.s2_paddr + io.requestor(i).clock_enabled := io.mem.clock_enabled + resp.bits := io.mem.resp.bits + resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) + + io.requestor(i).replay_next := io.mem.replay_next + + io.requestor(i).uncached_resp.map { uncached_resp => + val uncached_tag_hit = io.mem.uncached_resp.get.bits.tag(log2Up(n) - 1, 0) === i.U + uncached_resp.valid := io.mem.uncached_resp.get.valid && uncached_tag_hit + when(uncached_resp.ready && uncached_tag_hit) { + io.mem.uncached_resp.get.ready := true.B + } + uncached_resp.bits := io.mem.uncached_resp.get.bits + uncached_resp.bits.tag := io.mem.uncached_resp.get.bits.tag >> log2Up(n) + } + } + } +} From 1f2646f6a126370295f544cbff848c4c117ec974 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Mon, 1 Jul 2024 18:49:54 +0800 Subject: [PATCH 071/140] [rocketv] migrate DCache --- rocketv/src/Bundle.scala | 53 +- rocketv/src/HellaCache.scala | 1892 +++++++++++++++++++++++---- rocketv/src/HellaCacheArbiter.scala | 132 +- 3 files changed, 1798 insertions(+), 279 deletions(-) diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index 48a7fab46..c60156caf 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -5,7 +5,7 @@ package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{Cat, Decoupled, Valid, isPow2, log2Ceil} +import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, isPow2, log2Ceil} // This file defines Bundle shared in the project. // all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. @@ -1314,3 +1314,54 @@ class TLBEntryData(ppnBits: Int) extends Bundle { /** fragmented_superpage support */ val fragmented_superpage = Bool() } + +class DCacheErrors(hasCorrectable: Boolean, hasUncorrectable: Boolean, paddrBits: Int) extends Bundle { + val correctable: Option[Valid[UInt]] = Option.when(hasCorrectable)(Valid(UInt(paddrBits.W))) + val uncorrectable: Option[Valid[UInt]] = Option.when(hasUncorrectable)(Valid(UInt(paddrBits.W))) + val bus: Valid[UInt] = Valid(UInt(paddrBits.W)) +} + +class DCacheTLBPort(paddrBits: Int, vaddrBitsExtended: Int) extends Bundle { + val req: DecoupledIO[TLBReq] = Flipped(Decoupled(new TLBReq(paddrBits, vaddrBitsExtended))) + val s1_resp: TLBResp = Output(new TLBResp(paddrBits, vaddrBitsExtended)) + val s2_kill: Bool = Input(Bool()) +} + +object ClientStates { + val width = 2 + + def Nothing = 0.U(width.W) + def Branch = 1.U(width.W) + def Trunk = 2.U(width.W) + def Dirty = 3.U(width.W) + + def hasReadPermission(state: UInt): Bool = state > Nothing + def hasWritePermission(state: UInt): Bool = state > Branch +} + +class ClientMetadata extends Bundle { + /** Actual state information stored in this bundle */ + val state = UInt(ClientStates.width.W) +} + +class L1Metadata(tagBits: Int) extends Bundle { + val coh = new ClientMetadata + val tag = UInt(tagBits.W) +} + +class DCacheMetadataReq(vaddrBitsExtended: Int, idxBits: Int, nWays: Int, dataWidth: Int) extends Bundle { + val write: Bool = Bool() + val addr: UInt = UInt(vaddrBitsExtended.W) + val idx: UInt = UInt(idxBits.W) + val way_en: UInt = UInt(nWays.W) + val data: UInt = UInt(dataWidth.W) +} + +class DCacheDataReq(untagBits: Int, encBits: Int, rowBytes: Int, eccBytes: Int, subWordBytes: Int, wordBytes: Int, nWays: Int) extends Bundle { + val addr: UInt = UInt(untagBits.W) + val write: Bool = Bool() + val wdata: UInt = UInt((encBits * rowBytes / eccBytes).W) + val wordMask: UInt = UInt((rowBytes / subWordBytes).W) + val eccMask: UInt = UInt((wordBytes / eccBytes).W) + val way_en: UInt = UInt(nWays.W) +} diff --git a/rocketv/src/HellaCache.scala b/rocketv/src/HellaCache.scala index 77df10941..5574a0940 100644 --- a/rocketv/src/HellaCache.scala +++ b/rocketv/src/HellaCache.scala @@ -1,318 +1,1682 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{log2Ceil, log2Up, Decoupled, Valid} -import freechips.rocketchip.amba._ -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.tile._ -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import org.chipsalliance.cde.config.{Field, Parameters} - -import scala.collection.mutable.ListBuffer - -trait HasL1HellaCacheParameters extends HasL1CacheParameters with HasCoreParameters { - val cacheParams = tileParams.dcache.get - val cfg = cacheParams - - def wordBits = coreDataBits - def wordBytes = coreDataBytes - def subWordBits = cacheParams.subWordBits.getOrElse(wordBits) - def subWordBytes = subWordBits / 8 - def wordOffBits = log2Up(wordBytes) - def beatBytes = cacheBlockBytes / cacheDataBeats - def beatWords = beatBytes / wordBytes - def beatOffBits = log2Up(beatBytes) - def idxMSB = untagBits - 1 - def idxLSB = blockOffBits - def offsetmsb = idxLSB - 1 - def offsetlsb = wordOffBits - def rowWords = rowBits / wordBits - def doNarrowRead = coreDataBits * nWays % rowBits == 0 - def eccBytes = cacheParams.dataECCBytes - val eccBits = cacheParams.dataECCBytes * 8 - val encBits = cacheParams.dataCode.width(eccBits) - val encWordBits = encBits * (wordBits / eccBits) - def encDataBits = cacheParams.dataCode.width(coreDataBits) // NBDCache only - def encRowBits = encDataBits * rowWords - def lrscCycles = coreParams.lrscCycles // ISA requires 16-insn LRSC sequences to succeed - def lrscBackoff = 3 // disallow LRSC reacquisition briefly - def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant - def nIOMSHRs = cacheParams.nMMIOs - def maxUncachedInFlight = cacheParams.nMMIOs - def dataScratchpadSize = cacheParams.dataScratchpadBytes - - require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)") - if (!usingDataScratchpad) - require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)") - // would need offset addr for puts if data width < xlen - require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)") +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter, SourceInfo} +import chisel3.util.experimental.{BitSet, InlineInstance} +import chisel3.util.{Arbiter, BitPat, Cat, Enum, Fill, FillInterleaved, Mux1H, MuxLookup, OHToUInt, PriorityEncoder, PriorityEncoderOH, PriorityMux, Queue, RegEnable, SRAM, SRAMInterface, UIntToOH, isPow2, log2Ceil} +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ChiselBundle, AXI4ROIrrevocable, AXI4RWIrrevocable, R, W} + +object HellaCacheParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit def rwP: upickle.default.ReadWriter[HellaCacheParameter] = upickle.default.macroRW[HellaCacheParameter] } -abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module with HasL1HellaCacheParameters +case class HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + nWays: Int, + nSets: Int, + rowBits: Int, + nTLBSets: Int, + nTLBWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) extends SerializableModuleParameter { -abstract class L1HellaCacheBundle(implicit val p: Parameters) - extends ParameterizedBundle()(p) - with HasL1HellaCacheParameters + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) (if (usingHypervisor) 1 else 0) + 1 else 0) -/** Bundle definitions for HellaCache interfaces */ + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits -trait HasCoreMemOp extends HasL1HellaCacheParameters { - val addr = UInt(coreMaxAddrBits.W) - val idx = (usingVM && untagBits > pgIdxBits).option(UInt(coreMaxAddrBits.W)) - val tag = UInt((coreParams.dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) - val cmd = UInt(M_SZ.W) - val size = UInt(log2Ceil(coreDataBytes.log2 + 1).W) - val signed = Bool() - val dprv = UInt(PRV.SZ.W) - val dv = Bool() -} + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits -trait HasCoreData extends HasCoreParameters { - val data = UInt(coreDataBits.W) - val mask = UInt(coreDataBytes.W) -} + def maxHypervisorExtraAddrBits: Int = 2 -class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp { - val phys = Bool() - val no_alloc = Bool() - val no_xcpt = Bool() -} + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 -class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits -class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp with HasCoreData { - val replay = Bool() - val has_data = Bool() - val data_word_bypass = UInt(coreDataBits.W) - val data_raw = UInt(coreDataBits.W) - val store_data = UInt(coreDataBits.W) -} + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + // static for now + def dcacheReqTagBits: Int = 6 -class AlignmentExceptions extends Bundle { - val ld = Bool() - val st = Bool() -} + def usingHypervisor = false -class HellaCacheExceptions extends Bundle { - val ma = new AlignmentExceptions - val pf = new AlignmentExceptions - val gf = new AlignmentExceptions - val ae = new AlignmentExceptions -} + def scratch: Option[BigInt] = None + + def acquireBeforeRelease: Boolean = false + + def replacementPolicy: String = "random" //lfsr + + def usingAtomics: Boolean = true + + def useAtomicsOnlyForIO: Boolean = false + + def flushOnFenceI: Boolean = true + + def useVector: Boolean = false + + def haveCFlush: Boolean = false + + def subWordBits: Option[Int] = None + + // calculated + def pgIdxBits: Int = 12 + + def lgCacheBlockBytes: Int = log2Ceil(cacheBlockBytes) + + def blockOffBits: Int = lgCacheBlockBytes + + def idxBits: Int = log2Ceil(nSets) + + def untagBits: Int = blockOffBits + idxBits + + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + + def usingDataScratchpad: Boolean = scratch.isDefined + + def dcacheArbPorts: Int = 1 + (if (usingVM) 1 else 0) + (if (usingDataScratchpad) 1 else 0) + + def tagCode: Code = Code.fromString(tagECC) + + def dataCode: Code = Code.fromString(dataECC) + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def pipelineWayMux: Boolean = false + + def nPMPs: Int = 8 + + def vpnBits: Int = vaddrBits - pgIdxBits + + def hasCorrectable: Boolean = tagCode.canCorrect || dataCode.canCorrect + + def hasUncorrectable: Boolean = tagCode.canDetect || dataCode.canDetect + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + /* Sv32 */ + val maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 + } + + def coreDataBits: Int = xLen max fLen + + def coreDataBytes: Int = coreDataBits / 8 + + def silentDrop: Boolean = !acquireBeforeRelease + + def idxMSB: Int = untagBits - 1 + + def idxLSB: Int = blockOffBits + + def wordBits: Int = coreDataBits + + def rowWords: Int = rowBits / wordBits + + def wordBytes: Int = coreDataBytes + + def wordOffBits: Int = log2Ceil(wordBytes) + + def cacheDataBits: Int = rowBits + + def cacheDataBeats: Int = (cacheBlockBytes * 8) / cacheDataBits + + def beatBytes: Int = cacheBlockBytes / cacheDataBeats + + def beatWords: Int = beatBytes / wordBytes + + def dataECCBytes: Int = 1 -class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) with HasCoreData - -class HellaCachePerfEvents extends Bundle { - val acquire = Bool() - val release = Bool() - val grant = Bool() - val tlbMiss = Bool() - val blocked = Bool() - val canAcceptStoreThenLoad = Bool() - val canAcceptStoreThenRMW = Bool() - val canAcceptLoadThenLoad = Bool() - val storeBufferEmptyAfterLoad = Bool() - val storeBufferEmptyAfterStore = Bool() + def eccBits: Int = dataECCBytes * 8 + + def eccBytes: Int = dataECCBytes + + def encBits: Int = dataCode.width(eccBits) + + def rowBytes: Int = rowBits / 8 + + def subWordBytes: Int = subWordBits.getOrElse(wordBits) / 8 + + def rowOffBits: Int = log2Ceil(rowBytes) + + def beatOffBits: Int = log2Ceil(beatBytes) + + def usingAtomicsInCache: Boolean = usingAtomics && !useAtomicsOnlyForIO + + def pgUntagBits: Int = if (usingVM) untagBits min pgIdxBits else untagBits + + def tagBits: Int = paddrBits - pgUntagBits + + // todo: max axi id + def firstMMIO: Int = 4 + + def lrscBackoff: Int = 3 + + def lrscCycles: Int = 80 // worst case is 14 mispredicted branches + slop + + def pmaCheckerParameter: PMACheckerParameter = PMACheckerParameter( + paddrBits, + legal, + cacheable, + read, + write, + putPartial, + logic, + arithmetic, + exec, + sideEffects) + + def tlbParameter: TLBParameter = TLBParameter( + useAsyncReset, + xLen, + nTLBSets, + nTLBWays, + nSectors = 4, + nSuperpageEntries = 4, + asidBits = 0, + pgLevels, + usingHypervisor = false, + usingAtomics, + usingDataScratchpad, + useAtomicsOnlyForIO, + usingVM, + usingAtomicsInCache, + nPMPs, + pmaCheckerParameter, + paddrBits, + isITLB = false + ) + + def amoaluParameter: Option[AMOALUParameter] = Option.when(eccBytes > 1 || usingAtomicsInCache)(AMOALUParameter(xLen)) + + def dtimParameter: Option[AXI4BundleParameter] = scratch.map { _ => + AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 0, + userDataWidth = 0, + userRespWidth = 0, + hasAW = true, + hasW = true, + hasB = true, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) + } + + def loadStoreParameter: AXI4BundleParameter = AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 1, + userDataWidth = 0, + userRespWidth = 1, + hasAW = true, + hasW = true, + hasB = true, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) } -// interface between D$ and processor/DTLB -class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { - val req = Decoupled(new HellaCacheReq) - val s1_kill = Output(Bool()) // kill previous cycle's req - val s1_data = Output(new HellaCacheWriteData()) // data for previous cycle's req - val s2_nack = Input(Bool()) // req from two cycles ago is rejected - val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) - val s2_kill = Output(Bool()) // kill req from two cycles ago - val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO - val s2_paddr = Input(UInt(paddrBits.W)) // translated address - - val resp = Flipped(Valid(new HellaCacheResp)) - val replay_next = Input(Bool()) - val s2_xcpt = Input(new HellaCacheExceptions) - val s2_gpa = Input(UInt(vaddrBitsExtended.W)) - val s2_gpa_is_pte = Input(Bool()) - val uncached_resp = tileParams.dcache.get.separateUncachedResp.option(Flipped(Decoupled(new HellaCacheResp))) - val ordered = Input(Bool()) - val perf = Input(new HellaCachePerfEvents()) - - val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? - val clock_enabled = Input(Bool()) // is D$ currently being clocked? +class HellaCacheInterface(parameter: HellaCacheParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val cpu = Flipped( + new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) + ) + val ptw = new TLBPTWIO( + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.vaddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits + ) + val errors = new DCacheErrors(parameter.hasCorrectable, parameter.hasUncorrectable, parameter.paddrBits) + val loadStoreAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) } -/** Base classes for Diplomatic TL2 HellaCaches */ - -abstract class HellaCache()(implicit p: Parameters) - extends LazyModule - with HasNonDiplomaticTileParameters { - protected val cfg = tileParams.dcache.get - - protected def cacheClientParameters = cfg.scratch - .map(x => Seq()) - .getOrElse( - Seq( - TLMasterParameters.v1( - name = s"Core DCache", - sourceId = IdRange(0, 1.max(cfg.nMSHRs)), - supportsProbe = TransferSizes(cfg.blockBytes, cfg.blockBytes) +@instantiable +class HellaCache(val parameter: HellaCacheParameter) + extends FixedIORawModule(new HellaCacheInterface(parameter)) + with SerializableModule[HellaCacheParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + // instantiate sub hierarchies + val tlb: Instance[TLB] = Instantiate(new TLB(parameter.tlbParameter)) + val pmaChecker: Instance[PMAChecker] = Instantiate(new PMAChecker(parameter.pmaCheckerParameter)) + val amoalus: Option[Seq[Instance[AMOALU]]] = parameter.amoaluParameter.map(amoaluParameter=>Seq.tabulate(parameter.coreDataBits / parameter.xLen)(i => Instantiate(new AMOALU(amoaluParameter)))) + + tlb.io.clock := io.clock + tlb.io.reset := io.reset + + // compatibility layers + object cacheParams { + def tagCode: Code = parameter.tagCode + def dataCode: Code = parameter.dataCode + def silentDrop: Boolean = parameter.silentDrop + def acquireBeforeRelease: Boolean = parameter.acquireBeforeRelease + def clockGate: Boolean = parameter.clockGate + def replacementPolicy: String = parameter.replacementPolicy + def separateUncachedResp: Boolean = parameter.separateUncachedResp + def pipelineWayMux: Boolean = parameter.pipelineWayMux + } + def rowWords: Int = parameter.rowWords + def wordOffBits: Int = parameter.wordOffBits + def beatWords: Int = parameter.beatWords + def beatBytes: Int = parameter.beatBytes + def idxMSB: Int = parameter.idxMSB + def idxLSB: Int = parameter.idxLSB + def subWordBits: Int = parameter.subWordBits.getOrElse(parameter.wordBits) + def eccBits: Int = parameter.eccBits + def eccBytes: Int = parameter.eccBytes + def coreMaxAddrBits: Int = parameter.coreMaxAddrBits + def usingVM: Boolean = parameter.usingVM + def pgIdxBits: Int = parameter.pgIdxBits + def pgLevelBits: Int = parameter.pgLevelBits + def dcacheReqTagBits: Int = parameter.dcacheReqTagBits + def dcacheArbPorts: Int = parameter.dcacheArbPorts + def coreDataBytes: Int = parameter.coreDataBytes + def encBits: Int = parameter.encBits + def untagBits: Int = parameter.untagBits + def rowBytes: Int = parameter.rowBytes + def subWordBytes: Int = parameter.subWordBytes + def rowOffBits: Int = parameter.rowOffBits + def beatOffBits: Int = parameter.beatOffBits + def wordBytes: Int = parameter.wordBytes + def usingAtomicsInCache: Boolean = parameter.usingAtomicsInCache + def nWays: Int = parameter.nWays + def nSets: Int = parameter.nSets + def cacheBlockBytes: Int = parameter.cacheBlockBytes + def vaddrBitsExtended: Int = parameter.vaddrBitsExtended + def paddrBits: Int = parameter.paddrBits + def maxUncachedInFlight: Int = parameter.maxUncachedInFlight + def tagBits: Int = parameter.tagBits + def idxBits: Int = parameter.idxBits + def blockOffBits: Int = parameter.blockOffBits + def usingDataScratchpad: Boolean = parameter.usingDataScratchpad + def usingAtomics: Boolean = parameter.usingAtomics + def lrscBackoff: Int = parameter.lrscBackoff + def lrscCycles: Int = parameter.lrscCycles + def rowBits: Int = parameter.rowBits + def cacheDataBits: Int = parameter.rowBits + def cacheDataBytes: Int = cacheDataBits / 8 + def cacheDataBeats: Int = (cacheBlockBytes * 8) / cacheDataBits + def refillCycles: Int = cacheDataBeats + def blockProbeAfterGrantCycles: Int = 8 + def wordBits: Int = parameter.coreDataBits + object outer { + def firstMMIO = parameter.firstMMIO + def flushOnFenceI = parameter.flushOnFenceI + } + object coreParams { + def useVector = parameter.useVector + def haveCFlush = parameter.haveCFlush + } + object ClientMetadata { + def isValid(cm: ClientMetadata): Bool = cm.state > 0.U + + def apply(perm: UInt): ClientMetadata = { + val meta = Wire(new ClientMetadata) + meta.state := perm + meta + } + } + object L1Metadata { + def apply(tag: Bits, coh: ClientMetadata) = { + val meta = Wire(new L1Metadata(parameter.tagBits)) + meta.tag := tag + meta.coh := coh + meta + } + } + def M_SFENCE = "b10100".U // SFENCE.VMA + def M_HFENCEV = "b10101".U // HFENCE.VVMA + def M_HFENCEG = "b10110".U // HFENCE.GVMA + def M_FLUSH_ALL = "b00101".U // flush all lines + def M_WOK = "b10111".U // check write permissions but don't perform a write + def M_PWR = "b10001".U // partial (masked) store + def M_XLR = "b00110".U + def M_XSC = "b00111".U + def M_XWR = "b00001".U; // int store + def M_XRD = "b00000".U; // int load + def M_PFW = "b00011".U; // prefetch with intent to write + + // todo + def grouped(x: UInt, width: Int): Seq[UInt] = + (0 until x.getWidth by width).map(base => x(base + width - 1, base)) + def grouped[T <: Data](x: Vec[T], width: Int): Seq[Vec[T]] = + (0 until x.size by width).map(base => + VecInit(Seq.tabulate(width){i => x(base + i)}) + ) + + val clock = io.clock + val reset = io.reset + val pma_checker = pmaChecker + + val tECC = cacheParams.tagCode + val dECC = cacheParams.dataCode + require(subWordBits % eccBits == 0, "subWordBits must be a multiple of eccBits") + require(eccBytes == 1 || !dECC.isInstanceOf[IdentityCode]) + require(cacheParams.silentDrop || cacheParams.acquireBeforeRelease, "!silentDrop requires acquireBeforeRelease") + val usingRMW = eccBytes > 1 || usingAtomicsInCache + val mmioOffset = outer.firstMMIO + // edge.manager.requireFifo(TLFIFOFixer.allVolatile) // TileLink pipelining MMIO requests + + val clock_en_reg = Reg(Bool()) + io.cpu.clock_enabled := clock_en_reg + + val gated_clock = + if (!cacheParams.clockGate) clock + else chisel3.util.circt.ClockGate(clock, clock_en_reg) + class DCacheModuleImpl { // entering gated-clock domain + // tags + val replacer = ReplacementPolicy.fromString(cacheParams.replacementPolicy, nWays) + + /** Metadata Arbiter: + * 0: Tag update on reset + * 1: Tag update on ECC error + * 2: Tag update on hit + * 3: Tag update on refill + * 4: Tag update on release + * 5: Tag update on flush + * 6: Tag update on probe + * 7: Tag update on CPU request + */ + + val metaArb = Module(new Arbiter(new DCacheMetadataReq(vaddrBitsExtended, idxBits, nWays, cacheParams.tagCode.width(new L1Metadata(tagBits).getWidth)), 8) with InlineInstance) + // todo: delete + metaArb.io.in(1).valid := false.B + metaArb.io.in(1).bits := DontCare + + val tag_array: SRAMInterface[Vec[UInt]] = SRAM.masked( + size = nSets, + tpe = Vec(nWays, chiselTypeOf(metaArb.io.out.bits.data)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + ) + + // data + // val data = Module(new DCacheDataArray) + // no more DCacheDataArray module for better PD experience + // Vec(nWays, req.bits.wdata) + val dataArrays = Seq.tabulate(rowBits / subWordBits) { i => SRAM.masked( + size = nSets * cacheBlockBytes / rowBytes, + tpe = Vec(nWays * (subWordBits / eccBits), UInt(encBits.W)), + numReadPorts = 0, + numWritePorts = 0, + numReadwritePorts = 1 + )} + + /** Data Arbiter + * 0: data from pending store buffer + * 1: data from TL-D refill + * 2: release to TL-A + * 3: hit path to CPU + */ + val dataArb = Module(new Arbiter(new DCacheDataReq(untagBits, encBits, rowBytes, eccBytes, subWordBytes, wordBytes, nWays), 4) with InlineInstance) + + dataArb.io.in.tail.foreach(_.bits.wdata := dataArb.io.in.head.bits.wdata) // tie off write ports by default + dataArb.io.out.ready := true.B + metaArb.io.out.ready := clock_en_reg + + val readData: Seq[Seq[UInt]] = dataArrays.zipWithIndex.map { case (array, i) => + val valid = dataArb.io.out.valid && ((dataArrays.size == 1).B || dataArb.io.out.bits.wordMask(i)) + val dataEccMask = if (eccBits == subWordBits) Seq(true.B) else dataArb.io.out.bits.eccMask.asBools + val wMask = if (nWays == 1) dataEccMask else (0 until nWays).flatMap(i => dataEccMask.map(_ && dataArb.io.out.bits.way_en(i))) + val wWords = grouped(dataArb.io.out.bits.wdata, encBits * (subWordBits / eccBits)) + val addr = (dataArb.io.out.bits.addr >> rowOffBits).asUInt + val wData = VecInit(grouped(wWords(i), encBits)) + val wMaskSlice: Seq[Bool] = (0 until wMask.size) + .filter(j => i % (wordBytes * 8 / subWordBits) == (j % (wordBytes / eccBytes)) / (subWordBytes / eccBytes)) + .map(wMask(_)) + array.readwritePorts.foreach {arrayPort => + arrayPort.enable := valid + arrayPort.isWrite := dataArb.io.out.bits.write + arrayPort.address := addr + arrayPort.writeData := VecInit((0 until nWays).flatMap(i => wData)) + arrayPort.mask.foreach(_ := VecInit(wMaskSlice)) + } + val data: Vec[UInt] = array.readwritePorts.head.readData + // data.grouped(subWordBits / eccBits).map(_.asUInt).toSeq + grouped(data, subWordBits / eccBits).map(_.asUInt) + } + // (io.resp.zip(rdata.transpose)).foreach { case (resp, data) => resp := data.asUInt } + val rdata = readData.transpose.map(ds => VecInit(ds).asUInt) + + val release_queue_empty =Wire(Bool()) + + val s1_valid = RegNext(io.cpu.req.fire, false.B) + val releaseAddress = RegInit(0.U(parameter.paddrBits.W)) + val s1_nack = WireDefault(false.B) + val s1_valid_masked = s1_valid && !io.cpu.s1_kill + val s1_valid_not_nacked = s1_valid && !s1_nack + val s0_clk_en = metaArb.io.out.valid && !metaArb.io.out.bits.write + + val s0_req = WireInit(io.cpu.req.bits) + s0_req.addr := Cat(metaArb.io.out.bits.addr >> blockOffBits, io.cpu.req.bits.addr(blockOffBits - 1, 0)) + s0_req.idx.foreach(_ := Cat(metaArb.io.out.bits.idx, s0_req.addr(blockOffBits - 1, 0))) + when(!metaArb.io.in(7).ready) { s0_req.phys := true.B } + val s1_req = RegEnable(s0_req, s0_clk_en) + val s1_vaddr = Cat(s1_req.idx.getOrElse(s1_req.addr) >> tagLSB, s1_req.addr(tagLSB - 1, 0)) + + val s0_tlb_req: TLBReq = Wire(new TLBReq(paddrBits, vaddrBitsExtended)) + s0_tlb_req.passthrough := s0_req.phys + s0_tlb_req.vaddr := s0_req.addr + s0_tlb_req.size := s0_req.size + s0_tlb_req.cmd := s0_req.cmd + s0_tlb_req.prv := s0_req.dprv + s0_tlb_req.v := s0_req.dv + val s1_tlb_req = RegEnable(s0_tlb_req, s0_clk_en) + + val s1_read = isRead(s1_req.cmd) + val s1_write = isWrite(s1_req.cmd) + val s1_readwrite = s1_read || s1_write + val s1_sfence = s1_req.cmd === M_SFENCE || s1_req.cmd === M_HFENCEV || s1_req.cmd === M_HFENCEG + val s1_flush_line = s1_req.cmd === M_FLUSH_ALL && s1_req.size(0) + val s1_flush_valid = Reg(Bool()) + val s1_waw_hazard = Wire(Bool()) + + val s_ready :: s_voluntary_writeback :: s_voluntary_write_meta :: s_voluntary_aw :: Nil = Enum(4) + val supports_flush = outer.flushOnFenceI || coreParams.haveCFlush + val flushed = RegInit(true.B) + val flushing = RegInit(false.B) + val flushing_req = Reg(chiselTypeOf(s1_req)) + val cached_grant_wait = RegInit(false.B) + val resetting = RegInit(false.B) + val flushCounter = RegInit((nSets * (nWays - 1)).U(log2Ceil(nSets * nWays).W)) + val release_ack_wait = RegInit(false.B) + val release_ack_addr = Reg(UInt(paddrBits.W)) + val release_state = RegInit(s_ready) + val refill_way = Reg(UInt()) + val any_pstore_valid = Wire(Bool()) + def isOneOf(x: UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + val inWriteback = release_state === s_voluntary_writeback + val awState = release_state === s_voluntary_aw + val releaseWay = Wire(UInt()) + io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack + release_queue_empty := release_state =/= s_voluntary_writeback + + // I/O MSHRs + val uncachedInFlight = RegInit(VecInit(Seq.fill(maxUncachedInFlight)(false.B))) + val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes))) + val uncachedResp = WireInit(new HellaCacheReq( + coreMaxAddrBits, + usingVM, + untagBits, + pgIdxBits, + dcacheReqTagBits, + dcacheArbPorts, + coreDataBytes + ), DontCare) + + // hit initiation path + val s0_read = isRead(io.cpu.req.bits.cmd) + dataArb.io.in(3).valid := io.cpu.req.valid && likelyNeedsRead(io.cpu.req.bits) + dataArb.io.in(3).bits := dataArb.io.in(1).bits + dataArb.io.in(3).bits.write := false.B + dataArb.io.in(3).bits.addr := Cat( + io.cpu.req.bits.idx.getOrElse(io.cpu.req.bits.addr) >> tagLSB, + io.cpu.req.bits.addr(tagLSB - 1, 0) + ) + dataArb.io.in(3).bits.wordMask := { + val mask = (log2Ceil(subWordBytes) until rowOffBits).foldLeft(1.U) { + case (in, i) => + val upper_mask = Mux( + (i >= log2Ceil(wordBytes)).B || io.cpu.req.bits.size <= i.U, + 0.U, + ((BigInt(1) << (1 << (i - log2Ceil(subWordBytes)))) - 1).U + ) + val upper = Mux(io.cpu.req.bits.addr(i), in, 0.U) | upper_mask + val lower = Mux(io.cpu.req.bits.addr(i), 0.U, in) + upper ## lower + } + Fill(subWordBytes / eccBytes, mask) + } + dataArb.io.in(3).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + dataArb.io.in(3).bits.way_en := ~0.U(nWays.W) + when(!dataArb.io.in(3).ready && s0_read) { io.cpu.req.ready := false.B } + val s1_did_read = RegEnable(dataArb.io.in(3).ready && (io.cpu.req.valid && needsRead(io.cpu.req.bits)), s0_clk_en) + val s1_read_mask = RegEnable(dataArb.io.in(3).bits.wordMask, s0_clk_en) + metaArb.io.in(7).valid := io.cpu.req.valid + metaArb.io.in(7).bits.write := false.B + metaArb.io.in(7).bits.idx := dataArb.io.in(3).bits.addr(idxMSB, idxLSB) + metaArb.io.in(7).bits.addr := io.cpu.req.bits.addr + metaArb.io.in(7).bits.way_en := metaArb.io.in(4).bits.way_en + metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data + when(!metaArb.io.in(7).ready) { io.cpu.req.ready := false.B } + + // address translation + val s1_cmd_uses_tlb = s1_readwrite || s1_flush_line || s1_req.cmd === M_WOK + io.ptw <> tlb.io.ptw + tlb.io.kill := io.cpu.s2_kill + tlb.io.req.valid := s1_valid && !io.cpu.s1_kill && s1_cmd_uses_tlb + tlb.io.req.bits := s1_tlb_req + when(!tlb.io.req.ready && !tlb.io.ptw.resp.valid && !io.cpu.req.bits.phys) { io.cpu.req.ready := false.B } + when(s1_valid && s1_cmd_uses_tlb && tlb.io.resp.miss) { s1_nack := true.B } + + tlb.io.sfence.valid := s1_valid && !io.cpu.s1_kill && s1_sfence + tlb.io.sfence.bits.rs1 := s1_req.size(0) + tlb.io.sfence.bits.rs2 := s1_req.size(1) + tlb.io.sfence.bits.asid := io.cpu.s1_data.data + tlb.io.sfence.bits.addr := s1_req.addr + tlb.io.sfence.bits.hv := s1_req.cmd === M_HFENCEV + tlb.io.sfence.bits.hg := s1_req.cmd === M_HFENCEG + + val s1_paddr = Cat(tlb.io.resp.paddr >> pgIdxBits, s1_req.addr(pgIdxBits - 1, 0)) + + // pma_checker.io.req.bits.passthrough := true.B + // pma_checker.io.req.bits.vaddr := s1_req.addr + // pma_checker.io.req.bits.size := s1_req.size + // pma_checker.io.req.bits.cmd := s1_req.cmd + // pma_checker.io.req.bits.prv := s1_req.dprv + // pma_checker.io.req.bits.v := s1_req.dv + // todo: uncertain + pma_checker.io.paddr := s1_paddr + val s1_victim_way = Wire(UInt()) + val (s1_hit_way, s1_hit_state, s1_meta) = + if (usingDataScratchpad) { + val baseAddr: UInt = parameter.scratch.getOrElse(BigInt(0)).U + val inScratchpad = s1_paddr >= baseAddr && s1_paddr < baseAddr + (nSets * cacheBlockBytes).U + val hitState = Mux(inScratchpad, ClientMetadata(3.U), ClientMetadata(0.U)) + val dummyMeta = L1Metadata(0.U, ClientMetadata(0.U)) + (inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt))) + } else { + val metaReq = metaArb.io.out + val metaIdx = metaReq.bits.idx + val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.asBools + tag_array.readwritePorts.foreach { tagPort => + tagPort.enable := metaReq.valid + tagPort.isWrite := metaReq.bits.write + tagPort.address := metaIdx + tagPort.writeData := VecInit(Seq.fill(nWays)(metaReq.bits.data)) + tagPort.mask.foreach(_ := VecInit(wmask)) + } + val s1_meta: Seq[UInt] = tag_array.readwritePorts.head.readData + val s1_meta_uncorrected: Seq[L1Metadata] = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata(tagBits))) + val s1_tag: UInt = s1_paddr >> tagLSB + val s1_meta_hit_way = VecInit(s1_meta_uncorrected.map(r => ClientMetadata.isValid(r.coh) && r.tag === s1_tag)).asUInt + val s1_meta_hit_state = (s1_meta_uncorrected + .map(r => Mux(r.tag === s1_tag && !s1_flush_valid, r.coh.asUInt, 0.U)) + .reduce(_ | _)) + .asTypeOf(chiselTypeOf(ClientMetadata(0.U))) + (s1_meta_hit_way, s1_meta_hit_state, s1_meta) + } + val s1_data_way = WireDefault(if (nWays == 1) 1.U else Mux(inWriteback, releaseWay, s1_hit_way)) +// val tl_d_data_encoded = Wire(chiselTypeOf(encodeData(tl_out.d.bits.data, false.B))) + val tl_d_data_encoded = Wire(chiselTypeOf(encodeData(io.loadStoreAXI.r.bits.data, false.B))) +// val s1_all_data_ways = VecInit(data.io.resp ++ (!cacheParams.separateUncachedResp).option(tl_d_data_encoded)) + val s1_all_data_ways: Vec[UInt] = VecInit(rdata ++ Option.when(!cacheParams.separateUncachedResp)(tl_d_data_encoded)) + val s1_mask_xwr = new StoreGen(s1_req.size, s1_req.addr, 0.U, wordBytes).mask + val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, s1_mask_xwr) + // for partial writes, s1_data.mask must be a subset of s1_mask_xwr + assert(!(s1_valid_masked && s1_req.cmd === M_PWR) || (s1_mask_xwr | ~io.cpu.s1_data.mask).andR) + + val s2_valid = RegNext(s1_valid_masked && !s1_sfence, init = false.B) + val s2_valid_no_xcpt = s2_valid && !io.cpu.s2_xcpt.asUInt.orR + val releaseInFlight = release_state =/= s_ready + val s2_not_nacked_in_s1 = RegNext(!s1_nack) + val s2_valid_not_nacked_in_s1 = s2_valid && s2_not_nacked_in_s1 + val s2_valid_masked = s2_valid_no_xcpt && s2_not_nacked_in_s1 + val s2_valid_not_killed = s2_valid_masked && !io.cpu.s2_kill + val s2_req = Reg(chiselTypeOf(io.cpu.req.bits)) + val s2_cmd_flush_all = s2_req.cmd === M_FLUSH_ALL && !s2_req.size(0) + val s2_cmd_flush_line = s2_req.cmd === M_FLUSH_ALL && s2_req.size(0) + val s2_tlb_xcpt = Reg(chiselTypeOf(tlb.io.resp)) + val s2_pma = Reg(chiselTypeOf(tlb.io.resp)) + val s2_uncached_resp_addr = Reg(chiselTypeOf(s2_req.addr)) // should be DCE'd in synthesis + when(s1_valid_not_nacked || s1_flush_valid) { + s2_req := s1_req + s2_req.addr := s1_paddr + s2_tlb_xcpt := tlb.io.resp + s2_pma := tlb.io.resp + } + val s2_vaddr = Cat(RegEnable(s1_vaddr, s1_valid_not_nacked || s1_flush_valid) >> tagLSB, s2_req.addr(tagLSB - 1, 0)) + val s2_read = isRead(s2_req.cmd) + val s2_write = isWrite(s2_req.cmd) + val s2_readwrite = s2_read || s2_write + val s2_flush_valid_pre_tag_ecc = RegNext(s1_flush_valid) + val s1_meta_decoded = s1_meta.map(tECC.decode(_)) + val s1_meta_clk_en = s1_valid_not_nacked || s1_flush_valid + val s2_meta_correctable_errors = VecInit(s1_meta_decoded.map(m => RegEnable(m.correctable, s1_meta_clk_en))).asUInt + val s2_meta_uncorrectable_errors = VecInit(s1_meta_decoded.map(m => RegEnable(m.uncorrectable, s1_meta_clk_en))).asUInt + val s2_meta_error_uncorrectable = s2_meta_uncorrectable_errors.orR + val s2_meta_corrected = s1_meta_decoded.map(m => RegEnable(m.corrected, s1_meta_clk_en).asTypeOf(new L1Metadata(tagBits))) + val s2_meta_error = (s2_meta_uncorrectable_errors | s2_meta_correctable_errors).orR + val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error + val s2_data = { + val wordsPerRow = rowBits / subWordBits + val en = s1_valid || inWriteback || io.cpu.replay_next + val word_en = Mux(inWriteback, Fill(wordsPerRow, 1.U), Mux(s1_did_read, s1_read_mask, 0.U)) + val s1_way_words = s1_all_data_ways.map(grouped(_, dECC.width(eccBits) * (subWordBits / eccBits))) + if (cacheParams.pipelineWayMux) { + val s1_word_en = Mux(io.cpu.replay_next, 0.U, word_en) + VecInit(for (i <- 0 until wordsPerRow) yield { + val s2_way_en = RegEnable(Mux(s1_word_en(i), s1_data_way, 0.U), en) + val s2_way_words = (0 until nWays).map(j => RegEnable(s1_way_words(j)(i), en && word_en(i))) + (0 until nWays).map(j => Mux(s2_way_en(j), s2_way_words(j), 0.U)).reduce(_ | _) + }).asUInt + } else { + val s1_word_en = Mux( + !io.cpu.replay_next, + word_en, + UIntToOH(if (log2Ceil(rowBits / 8) == log2Ceil(wordBytes)) 0.U else uncachedResp.addr(log2Ceil(rowBits / 8) - 1, log2Ceil(wordBytes)), wordsPerRow) ) - ) + VecInit(for (i <- 0 until wordsPerRow) yield { + RegEnable(Mux1H(Mux(s1_word_en(i), s1_data_way, 0.U), s1_way_words.map(_(i))), en) + }).asUInt + } + } + val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) + val s2_hit_state: ClientMetadata = RegEnable(s1_hit_state, s1_valid_not_nacked || s1_flush_valid) + val s2_waw_hazard = RegEnable(s1_waw_hazard, s1_valid_not_nacked) + val s2_store_merge = Wire(Bool()) +// val s2_hit_valid = s2_hit_state.isValid() + val s2_hit_valid = s2_hit_state.state > 0.U + // No prob, so only D T N + // val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) + val s2_hit = s2_hit_valid + val nexState = Mux(s2_hit_state.state === 3.U || isWrite(s2_req.cmd), 3.U, 2.U) + val s2_new_hit_state: ClientMetadata = ClientMetadata(nexState) + val s2_data_decoded = decodeData(s2_data) + val s2_data_error = VecInit(s2_data_decoded.map(_.error)).asUInt.orR + val s2_data_error_uncorrectable = VecInit(s2_data_decoded.map(_.uncorrectable)).asUInt.orR + val s2_data_corrected = VecInit(s2_data_decoded.map(_.corrected): Seq[UInt]).asUInt + val s2_data_uncorrected = VecInit(s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt + val s2_valid_hit_maybe_flush_pre_data_ecc_and_waw = s2_valid_masked && !s2_meta_error && s2_hit + val s2_no_alloc_hazard = + if (!usingVM || pgIdxBits >= untagBits) false.B + else { + // make sure that any in-flight non-allocating accesses are ordered before + // any allocating accesses. this can only happen if aliasing is possible. + val any_no_alloc_in_flight = Reg(Bool()) + when(!uncachedInFlight.asUInt.orR) { any_no_alloc_in_flight := false.B } + when(s2_valid && s2_req.no_alloc) { any_no_alloc_in_flight := true.B } + val s1_need_check = any_no_alloc_in_flight || s2_valid && s2_req.no_alloc + + val concerns = (uncachedInFlight.zip(uncachedReqs)) :+ (s2_valid && s2_req.no_alloc, s2_req) + val s1_uncached_hits = VecInit(concerns.map { c => + val concern_wmask = new StoreGen(c._2.size, c._2.addr, 0.U, wordBytes).mask + val addr_match = (c._2.addr ^ s1_paddr)(pgIdxBits + pgLevelBits - 1, log2Ceil(wordBytes)) === 0.U + val mask_match = (concern_wmask & s1_mask_xwr).orR || c._2.cmd === M_PWR || s1_req.cmd === M_PWR + val cmd_match = isWrite(c._2.cmd) || isWrite(s1_req.cmd) + c._1 && s1_need_check && cmd_match && addr_match && mask_match + }) + + val s2_uncached_hits = RegEnable(s1_uncached_hits.asUInt, s1_valid_not_nacked) + s2_uncached_hits.orR + } + val s2_valid_hit_pre_data_ecc_and_waw = + s2_valid_hit_maybe_flush_pre_data_ecc_and_waw && s2_readwrite && !s2_no_alloc_hazard + val s2_valid_flush_line = s2_valid_hit_maybe_flush_pre_data_ecc_and_waw && s2_cmd_flush_line + val s2_valid_hit_pre_data_ecc = s2_valid_hit_pre_data_ecc_and_waw && (!s2_waw_hazard || s2_store_merge) + val s2_valid_data_error = s2_valid_hit_pre_data_ecc_and_waw && s2_data_error + val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error + val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit + val s2_uncached = !s2_pma.cacheable || s2_req.no_alloc && !s2_pma.must_alloc && !s2_hit_valid + val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR + dontTouch(s2_valid_cached_miss) + val s2_want_victimize = + (!usingDataScratchpad).B && (s2_valid_cached_miss || s2_valid_flush_line || s2_valid_data_error || s2_flush_valid) + val s2_cannot_victimize = !s2_flush_valid && io.cpu.s2_kill + val s2_victimize = s2_want_victimize && !s2_cannot_victimize + val s2_valid_uncached_pending = s2_valid_miss && s2_uncached && !uncachedInFlight.asUInt.andR + val s2_victim_way = UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)) + val s2_victim_or_hit_way = Mux(s2_hit_valid, s2_hit_way, s2_victim_way) + val s2_victim_tag = Mux( + s2_valid_data_error || s2_valid_flush_line, + s2_req.addr(paddrBits - 1, tagLSB), + Mux1H(s2_victim_way, s2_meta_corrected).tag ) + val s2_victim_state: ClientMetadata = Mux(s2_hit_valid, s2_hit_state, Mux1H(s2_victim_way, s2_meta_corrected).coh) + + val s2_victim_dirty = s2_victim_state.state === 3.U + dontTouch(s2_victim_dirty) + val s2_update_meta = s2_hit_state.state =/= s2_new_hit_state.state + val s2_dont_nack_uncached = s2_valid_uncached_pending && io.loadStoreAXI.aw.ready + val s2_dont_nack_misc = s2_valid_masked && !s2_meta_error && + (supports_flush.B && s2_cmd_flush_all && flushed && !flushing || + supports_flush.B && s2_cmd_flush_line && !s2_hit || + s2_req.cmd === M_WOK) + io.cpu.s2_nack := s2_valid_no_xcpt && !s2_dont_nack_uncached && !s2_dont_nack_misc && !s2_valid_hit + when(io.cpu.s2_nack || (s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta)) { s1_nack := true.B } - protected def mmioClientParameters = Seq( - TLMasterParameters.v1( - name = s"Core DCache MMIO", - sourceId = IdRange(firstMMIO, firstMMIO + cfg.nMMIOs), - requestFifo = true + // tag updates on ECC errors + val s2_first_meta_corrected = PriorityMux(s2_meta_correctable_errors, s2_meta_corrected) + metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc) + metaArb.io.in(1).bits.write := true.B + metaArb.io.in(1).bits.way_en := s2_meta_uncorrectable_errors | Mux( + s2_meta_error_uncorrectable, + 0.U, + PriorityEncoderOH(s2_meta_correctable_errors) ) - ) - def firstMMIO = (cacheClientParameters.map(_.sourceId.end) :+ 0).max + // tag updates on hit + metaArb.io.in(2).valid := s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta + metaArb.io.in(2).bits.write := !io.cpu.s2_kill + metaArb.io.in(2).bits.way_en := s2_victim_or_hit_way + metaArb.io.in(2).bits.idx := s2_vaddr(idxMSB, idxLSB) + metaArb.io.in(2).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_vaddr(idxMSB, 0)) + metaArb.io.in(2).bits.data := tECC.encode(L1Metadata(s2_req.addr >> tagLSB, s2_new_hit_state).asUInt) + + // load reservations and TL error reporting + val s2_lr = (usingAtomics && !usingDataScratchpad).B && s2_req.cmd === M_XLR + val s2_sc = (usingAtomics && !usingDataScratchpad).B && s2_req.cmd === M_XSC + val lrscCount = RegInit(0.U) + val lrscValid = lrscCount > lrscBackoff.U + val lrscBackingOff = lrscCount > 0.U && !lrscValid + val lrscAddr = Reg(UInt()) + val lrscAddrMatch = lrscAddr === (s2_req.addr >> blockOffBits) + val s2_sc_fail = s2_sc && !(lrscValid && lrscAddrMatch) + when((s2_valid_hit && s2_lr && !cached_grant_wait || s2_valid_cached_miss) && !io.cpu.s2_kill) { + lrscCount := Mux(s2_hit, (lrscCycles - 1).U, 0.U) + lrscAddr := s2_req.addr >> blockOffBits + } + when(lrscCount > 0.U) { lrscCount := lrscCount - 1.U } + when(s2_valid_not_killed && lrscValid) { lrscCount := lrscBackoff.U } - val node = TLClientNode( - Seq( - TLMasterPortParameters.v1( - clients = cacheClientParameters ++ mmioClientParameters, - minLatency = 1, - requestFields = tileParams.core.useVM.option(Seq()).getOrElse(Seq(AMBAProtField())) + // don't perform data correction if it might clobber a recent store + val s2_correct = + s2_data_error && !any_pstore_valid && !RegNext(any_pstore_valid || s2_valid) && usingDataScratchpad.B + // pending store buffer + val s2_valid_correct = s2_valid_hit_pre_data_ecc_and_waw && s2_correct && !io.cpu.s2_kill + def s2_store_valid_pre_kill = s2_valid_hit && s2_write && !s2_sc_fail + def s2_store_valid = s2_store_valid_pre_kill && !io.cpu.s2_kill + val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) + val pstore1_addr = RegEnable(s1_vaddr, s1_valid_not_nacked && s1_write) + val pstore1_data = RegEnable(io.cpu.s1_data.data, s1_valid_not_nacked && s1_write) + val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write) + val pstore1_mask = RegEnable(s1_mask, s1_valid_not_nacked && s1_write) + val pstore1_storegen_data = WireDefault(pstore1_data) + val pstore1_rmw = usingRMW.B && RegEnable(needsRead(s1_req), s1_valid_not_nacked && s1_write) + val pstore1_merge_likely = s2_valid_not_nacked_in_s1 && s2_write && s2_store_merge + val pstore1_merge = s2_store_valid && s2_store_merge + val pstore2_valid = RegInit(false.B) + val pstore_drain_opportunistic = + !(io.cpu.req.valid && likelyNeedsRead(io.cpu.req.bits)) && !(s1_valid && s1_waw_hazard) + val pstore_drain_on_miss = releaseInFlight || RegNext(io.cpu.s2_nack) + val pstore1_held = RegInit(false.B) + val pstore1_valid_likely = s2_valid && s2_write || pstore1_held + def pstore1_valid_not_rmw(s2_kill: Bool) = s2_valid_hit_pre_data_ecc && s2_write && !s2_kill || pstore1_held + val pstore1_valid = s2_store_valid || pstore1_held + any_pstore_valid := pstore1_held || pstore2_valid + val pstore_drain_structural = pstore1_valid_likely && pstore2_valid && ((s1_valid && s1_write) || pstore1_rmw) + assert(pstore1_rmw || pstore1_valid_not_rmw(io.cpu.s2_kill) === pstore1_valid) + ccover(pstore_drain_structural, "STORE_STRUCTURAL_HAZARD", "D$ read-modify-write structural hazard") + ccover(pstore1_valid && pstore_drain_on_miss, "STORE_DRAIN_ON_MISS", "D$ store buffer drain on miss") + ccover(s1_valid_not_nacked && s1_waw_hazard, "WAW_HAZARD", "D$ write-after-write hazard") + def should_pstore_drain(truly: Bool) = { + val s2_kill = truly && io.cpu.s2_kill + !pstore1_merge_likely && + (usingRMW.B && pstore_drain_structural || + (((pstore1_valid_not_rmw( + s2_kill + ) && !pstore1_rmw) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))) + } + val pstore_drain = should_pstore_drain(true.B) + pstore1_held := (s2_store_valid && !s2_store_merge || pstore1_held) && pstore2_valid && !pstore_drain + val advance_pstore1 = (pstore1_valid || s2_valid_correct) && (pstore2_valid === pstore_drain) + pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 + val pstore2_addr = RegEnable(Mux(s2_correct, s2_vaddr, pstore1_addr), advance_pstore1) + val pstore2_way = RegEnable(Mux(s2_correct, s2_hit_way, pstore1_way), advance_pstore1) + val pstore2_storegen_data = VecInit({ + for (i <- 0 until wordBytes) + yield RegEnable( + pstore1_storegen_data(8 * (i + 1) - 1, 8 * i), + advance_pstore1 || pstore1_merge && pstore1_mask(i) + ) + }).asUInt + val pstore2_storegen_mask = { + val mask = Reg(UInt(wordBytes.W)) + when(advance_pstore1 || pstore1_merge) { + val mergedMask = pstore1_mask | Mux(pstore1_merge, mask, 0.U) + mask := ~Mux(s2_correct, 0.U, ~mergedMask) + } + mask + } + s2_store_merge := (if (eccBytes == 1) false.B + else { + ccover(pstore1_merge, "STORE_MERGED", "D$ store merged") + // only merge stores to ECC granules that are already stored-to, to avoid + // WAW hazards + val wordMatch = (eccMask(pstore2_storegen_mask) | ~eccMask(pstore1_mask)).andR + val idxMatch = s2_vaddr(untagBits - 1, log2Ceil(wordBytes)) === pstore2_addr( + untagBits - 1, + log2Ceil(wordBytes) ) + val tagMatch = (s2_hit_way & pstore2_way).orR + pstore2_valid && wordMatch && idxMatch && tagMatch + }) + dataArb.io.in(0).valid := should_pstore_drain(false.B) + dataArb.io.in(0).bits.write := pstore_drain + dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) + dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) + dataArb.io.in(0).bits.wdata := encodeData( + Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_data)), + false.B ) - ) + dataArb.io.in(0).bits.wordMask := { + // val eccMask = dataArb.io.in(0).bits.eccMask.asBools.grouped(subWordBytes / eccBytes).map(_.orR).toSeq.asUInt + val eccMask = VecInit(grouped(VecInit(dataArb.io.in(0).bits.eccMask.asBools), subWordBytes / eccBytes).map(_.asUInt.orR)).asUInt + val wordMask = UIntToOH( + if (rowOffBits == log2Ceil(wordBytes)) 0.U + else Mux(pstore2_valid, pstore2_addr, pstore1_addr)(rowOffBits - 1, log2Ceil(wordBytes)) + ) + FillInterleaved(wordBytes / subWordBytes, wordMask) & Fill(rowBytes / wordBytes, eccMask) + } + dataArb.io.in(0).bits.eccMask := eccMask(Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask)) - val hartIdSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) - val mmioAddressPrefixSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) + // store->load RAW hazard detection + def s1Depends(addr: UInt, mask: UInt) = + addr(idxMSB, wordOffBits) === s1_vaddr(idxMSB, wordOffBits) && + Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask_xwr)).orR, (mask & s1_mask_xwr).orR) + val s1_hazard = + (pstore1_valid_likely && s1Depends(pstore1_addr, pstore1_mask)) || + (pstore2_valid && s1Depends(pstore2_addr, pstore2_storegen_mask)) + val s1_raw_hazard = s1_read && s1_hazard + s1_waw_hazard := (if (eccBytes == 1) false.B + else { + ccover(s1_valid_not_nacked && s1_waw_hazard, "WAW_HAZARD", "D$ write-after-write hazard") + s1_write && (s1_hazard || needsRead(s1_req) && !s1_did_read) + }) + when(s1_valid && s1_raw_hazard) { s1_nack := true.B } - val module: HellaCacheModule + // performance hints to processor + io.cpu.s2_nack_cause_raw := RegNext(s1_raw_hazard) || !(!s2_waw_hazard || s2_store_merge) - def flushOnFenceI = cfg.scratch.isEmpty && !node.edges - .out(0) - .manager - .managers - .forall(m => - !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED || m.regionType <= RegionType.IDEMPOTENT + // Prepare a TileLink request message that initiates a transaction + val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR + val acquire_address = (s2_req.addr >> idxLSB) << idxLSB + val access_address = s2_req.addr + val a_size = s2_req.size + val a_data = Fill(beatWords, pstore1_data) + val a_mask = pstore1_mask << ((if (log2Ceil(beatBytes) == log2Ceil(wordBytes)) 0.U else access_address(log2Ceil(beatBytes) - 1, log2Ceil(wordBytes))) << 3) + val memAccessValid = !io.cpu.s2_kill && + (s2_valid_uncached_pending || + (s2_valid_cached_miss && + !(release_ack_wait && (s2_req.addr ^ release_ack_addr)( + ((pgIdxBits + pgLevelBits).min(paddrBits)) - 1, + idxLSB + ) === 0.U) && + (cacheParams.acquireBeforeRelease.B && !release_ack_wait && release_queue_empty || !s2_victim_dirty))) + // !s2_uncached -> read cache line + val accessWillRead: Bool = !s2_uncached || !s2_write + // If no managers support atomics, assert fail if processor asks for them + assert(!(memAccessValid && s2_read && s2_write && s2_uncached)) + io.loadStoreAXI.ar.valid := memAccessValid && accessWillRead + io.loadStoreAXI.ar.bits := DontCare + io.loadStoreAXI.ar.bits.burst := 1.U + io.loadStoreAXI.ar.bits.addr := Mux( + s2_uncached, + access_address, + access_address >> parameter.lgCacheBlockBytes << parameter.lgCacheBlockBytes + ) + io.loadStoreAXI.ar.bits.len := Mux( + s2_uncached, + 0.U, + (parameter.cacheBlockBytes * 8 / parameter.loadStoreParameter.dataWidth - 1).U ) + io.loadStoreAXI.ar.bits.size := Mux(s2_uncached, a_size, parameter.lgCacheBlockBytes.U) + io.loadStoreAXI.ar.bits.id := a_source + io.loadStoreAXI.ar.bits.user := s2_uncached - def canSupportCFlushLine = !usingVM || cfg.blockBytes * cfg.nSets <= (1 << pgIdxBits) + io.loadStoreAXI.aw.valid := memAccessValid && !accessWillRead + io.loadStoreAXI.aw.bits := DontCare + io.loadStoreAXI.aw.bits.burst := 1.U + io.loadStoreAXI.aw.bits.addr := access_address + io.loadStoreAXI.aw.bits.len := 0.U + io.loadStoreAXI.aw.bits.size := a_size - require(!tileParams.core.haveCFlush || cfg.scratch.isEmpty, "CFLUSH_D_L1 instruction requires a D$") -} + val dataQueue: Queue[W] = Module(new Queue(chiselTypeOf(io.loadStoreAXI.w.bits), cacheDataBeats)) + dataQueue.io.enq.valid := memAccessValid && !accessWillRead + dataQueue.io.enq.bits.data := a_data + dataQueue.io.enq.bits.strb := a_mask + dataQueue.io.enq.bits.last := true.B + dataQueue.io.enq.bits.user := true.B // always uc + io.loadStoreAXI.w <> dataQueue.io.deq -class HellaCacheBundle(val outer: HellaCache)(implicit p: Parameters) extends CoreBundle()(p) { - val cpu = Flipped((new HellaCacheIO)) - val ptw = new TLBPTWIO() - val errors = new DCacheErrors -} +// // Drive APROT Bits +// tl_out_a.bits.user.lift(AMBAProt).foreach { x => +// val user_bit_cacheable = s2_pma.cacheable +// +// x.privileged := s2_req.dprv === PRV.M.U || user_bit_cacheable +// // if the address is cacheable, enable outer caches +// x.bufferable := user_bit_cacheable +// x.modifiable := user_bit_cacheable +// x.readalloc := user_bit_cacheable +// x.writealloc := user_bit_cacheable +// +// // Following are always tied off +// x.fetch := false.B +// x.secure := true.B +// } -class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer) with HasL1HellaCacheParameters { - implicit val edge = outer.node.edges.out(0) - val (tl_out, _) = outer.node.out(0) - val io = IO(new HellaCacheBundle(outer)) - val io_hartid = outer.hartIdSinkNodeOpt.map(_.bundle) - val io_mmio_address_prefix = outer.mmioAddressPrefixSinkNodeOpt.map(_.bundle) - dontTouch(io.cpu.resp) // Users like to monitor these fields even if the core ignores some signals - dontTouch(io.cpu.s1_data) - - require(rowBits == edge.bundle.dataBits) - - private val fifoManagers = edge.manager.managers.filter(TLFIFOFixer.allVolatile) - fifoManagers.foreach { m => - require( - m.fifoId == fifoManagers.head.fifoId, - s"IOMSHRs must be FIFO for all regions with effects, but HellaCache sees\n" + - s"${m.nodePath.map(_.name)}\nversus\n${fifoManagers.head.nodePath.map(_.name)}" + // Set pending bits for outstanding TileLink transaction + val a_sel = UIntToOH(a_source, maxUncachedInFlight + mmioOffset) >> mmioOffset + when(io.loadStoreAXI.ar.fire || io.loadStoreAXI.aw.fire) { + when(s2_uncached) { + (a_sel.asBools.zip(uncachedInFlight.zip(uncachedReqs))).foreach { + case (s, (f, r)) => + when(s) { + f := true.B + r := s2_req + r.cmd := Mux(s2_write, Mux(s2_req.cmd === M_PWR, M_PWR, M_XWR), M_XRD) + } + } + }.otherwise { + cached_grant_wait := true.B + refill_way := s2_victim_or_hit_way + } + } + + def axiHelper(x: AXI4ChiselBundle, fire: Bool): (Bool, Bool, Bool, UInt) = { + // same as len + val count = RegInit(0.U(8.W)) + val first = count === 0.U + val last: Bool = x match { + case r: R => r.last + case w: W => w.last + case _ => true.B + } + val done = last && fire + when(fire) { + count := Mux(last, 0.U, count + 1.U) + } + (first, last, done, count) + } + + // grant + val (d_first, d_last, d_done, d_refill_count) = axiHelper(io.loadStoreAXI.r.bits, io.loadStoreAXI.r.fire) +// val (d_opc, grantIsUncached, grantIsUncachedData) = { +// val uncachedGrantOpcodesSansData = Seq(AccessAck, HintAck) +// val uncachedGrantOpcodesWithData = Seq(AccessAckData) +// val uncachedGrantOpcodes = uncachedGrantOpcodesWithData ++ uncachedGrantOpcodesSansData +// val whole_opc = tl_out.d.bits.opcode +// if (usingDataScratchpad) { +// assert(!tl_out.d.valid || whole_opc.isOneOf(uncachedGrantOpcodes)) +// // the only valid TL-D messages are uncached, so we can do some pruning +// val opc = whole_opc(uncachedGrantOpcodes.map(_.getWidth).max - 1, 0) +// val data = DecodeLogic(opc, uncachedGrantOpcodesWithData, uncachedGrantOpcodesSansData) +// (opc, true.B, data) +// } else { +// (whole_opc, whole_opc.isOneOf(uncachedGrantOpcodes), whole_opc.isOneOf(uncachedGrantOpcodesWithData)) +// } +// } + tl_d_data_encoded := encodeData( + io.loadStoreAXI.r.bits.data, + // tl_out.d.bits.corrupt && !io.ptw.customCSRs.suppressCorruptOnGrantData && !grantIsUncached + false.B ) - } -} + val grantIsUncachedData = io.loadStoreAXI.r.bits.user(0) + val grantIsCached = !io.loadStoreAXI.r.bits.user(0) + val grantIsRefill = grantIsCached // Writes the data array + val grantInProgress = RegInit(false.B) + val blockProbeAfterGrantCount = RegInit(0.U) + when(blockProbeAfterGrantCount > 0.U) { blockProbeAfterGrantCount := blockProbeAfterGrantCount - 1.U } + // !release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta, s_voluntary_aw) + val canAcceptCachedGrant = !Seq(s_voluntary_writeback, s_voluntary_write_meta, s_voluntary_aw).map(_ === release_state).reduce(_ || _) + io.loadStoreAXI.r.ready := Mux(grantIsCached, canAcceptCachedGrant, true.B) + val uncachedRespIdxOH = (UIntToOH(io.loadStoreAXI.r.bits.id, maxUncachedInFlight + mmioOffset) >> mmioOffset).asUInt + uncachedResp := Mux1H(uncachedRespIdxOH, uncachedReqs) + when(io.loadStoreAXI.r.fire) { + when(grantIsCached) { + grantInProgress := true.B + assert(cached_grant_wait, "A GrantData was unexpected by the dcache.") + when(d_last) { + cached_grant_wait := false.B + grantInProgress := false.B + blockProbeAfterGrantCount := (blockProbeAfterGrantCycles - 1).U + replacer.miss + } + }.otherwise { + (uncachedRespIdxOH.asBools.zip(uncachedInFlight)).foreach { + case (s, f) => + when(s && d_last) { + assert(f, "An AccessAck was unexpected by the dcache.") // TODO must handle Ack coming back on same cycle! + f := false.B + } + } + // r always has data + if (!cacheParams.separateUncachedResp) { + if (!cacheParams.pipelineWayMux) + s1_data_way := 1.U << nWays + s2_req.cmd := M_XRD + s2_req.size := uncachedResp.size + s2_req.signed := uncachedResp.signed + s2_req.tag := uncachedResp.tag + s2_req.addr := { + require(rowOffBits >= beatOffBits) + val dontCareBits = s1_paddr >> rowOffBits << rowOffBits + dontCareBits | uncachedResp.addr(beatOffBits - 1, 0) + } + s2_uncached_resp_addr := uncachedResp.addr + } + } + } -/** Support overriding which HellaCache is instantiated */ + io.loadStoreAXI.b.ready := true.B + when(io.loadStoreAXI.b.fire) { + assert( + release_ack_wait, + "A ReleaseAck was unexpected by the dcache." + ) // TODO should handle Ack coming back on same cycle! + release_ack_wait := false.B + } -case object BuildHellaCache extends Field[BaseTile => Parameters => HellaCache](HellaCacheFactory.apply) + // Finish TileLink transaction by issuing a GrantAck + // tl_out.e.valid := tl_out.d.valid && d_first && grantIsCached && canAcceptCachedGrant + // tl_out.e.bits := edge.GrantAck(tl_out.d.bits) + // assert(tl_out.e.fire === (tl_out.d.fire && d_first && grantIsCached)) -object HellaCacheFactory { - def apply(tile: BaseTile)(p: Parameters): HellaCache = { - assert(tile.tileParams.dcache.get.nMSHRs == 0) - new DCache(tile.crossing)(p) - } -} + // data refill + // note this ready-valid signaling ignores E-channel backpressure, which + // benignly means the data RAM might occasionally be redundantly written + dataArb.io.in(1).valid := io.loadStoreAXI.r.valid && grantIsRefill && canAcceptCachedGrant + when(grantIsRefill && !dataArb.io.in(1).ready) { + // tl_out.e.valid := false.B + // tl_out.d.ready := false.B + io.loadStoreAXI.r.ready := false.B + } + if (!usingDataScratchpad) { + dataArb.io.in(1).bits.write := true.B + dataArb.io.in(1).bits.addr := + (s2_vaddr >> idxLSB) << idxLSB | + (d_refill_count << log2Ceil(parameter.loadStoreParameter.dataWidth / 8)) + dataArb.io.in(1).bits.way_en := refill_way + dataArb.io.in(1).bits.wdata := tl_d_data_encoded + dataArb.io.in(1).bits.wordMask := ~0.U((rowBytes / subWordBytes).W) + dataArb.io.in(1).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + } else { + dataArb.io.in(1).bits := dataArb.io.in(0).bits + } + + // tag updates on refill + // ignore backpressure from metaArb, which can only be caused by tag ECC + // errors on hit-under-miss. failing to write the new tag will leave the + // line invalid, so we'll simply request the line again later. +// metaArb.io.in(3).valid := grantIsCached && d_done && !tl_out.d.bits.denied + metaArb.io.in(3).valid := grantIsCached && d_done + metaArb.io.in(3).bits.write := true.B + metaArb.io.in(3).bits.way_en := refill_way + metaArb.io.in(3).bits.idx := s2_vaddr(idxMSB, idxLSB) + metaArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_vaddr(idxMSB, 0)) + metaArb.io.in(3).bits.data := tECC.encode( + L1Metadata(s2_req.addr >> tagLSB, s2_new_hit_state).asUInt + ) -/** Mix-ins for constructing tiles that have a HellaCache */ - -trait HasHellaCache { this: BaseTile => - val module: HasHellaCacheModule - implicit val p: Parameters - var nDCachePorts = 0 - lazy val dcache: HellaCache = LazyModule(p(BuildHellaCache)(this)(p)) - - tlMasterXbar.node := TLWidthWidget(tileParams.dcache.get.rowBits / 8) := dcache.node - dcache.hartIdSinkNodeOpt.map { _ := hartIdNexusNode } - dcache.mmioAddressPrefixSinkNodeOpt.map { _ := mmioAddressPrefixNexusNode } - InModuleBody { - dcache.module match { - case module: DCacheModule => module.tlb_port := DontCare - case other => other + if (!cacheParams.separateUncachedResp) { + // don't accept uncached grants if there's a structural hazard on s2_data... + val blockUncachedGrant = Reg(Bool()) + blockUncachedGrant := dataArb.io.out.valid + when(grantIsUncachedData && (blockUncachedGrant || s1_valid)) { + io.loadStoreAXI.r.ready := false.B + // ...but insert bubble to guarantee grant's eventual forward progress + when(io.loadStoreAXI.r.valid) { + io.cpu.req.ready := false.B + dataArb.io.in(1).valid := true.B + dataArb.io.in(1).bits.write := false.B + blockUncachedGrant := !dataArb.io.in(1).ready + } + } } + ccover(io.loadStoreAXI.r.valid && !io.loadStoreAXI.r.ready, "BLOCK_D", "D$ D-channel blocked") + + // no probe + metaArb.io.in(6).valid := false.B + metaArb.io.in(6).bits := DontCare + + // replacement policy + s1_victim_way := (if (replacer.perSet && nWays > 1) { + val repl_array = Mem(nSets, UInt(replacer.nBits.W)) + val s1_repl_idx = s1_req.addr(idxBits + blockOffBits - 1, blockOffBits) + val s2_repl_idx = s2_vaddr(idxBits + blockOffBits - 1, blockOffBits) + val s2_repl_state = Reg(UInt(replacer.nBits.W)) + val s2_new_repl_state = replacer.get_next_state(s2_repl_state, OHToUInt(s2_hit_way)) + val s2_repl_wen = s2_valid_masked && s2_hit_way.orR && s2_repl_state =/= s2_new_repl_state + val s1_repl_state = + Mux(s2_repl_wen && s2_repl_idx === s1_repl_idx, s2_new_repl_state, repl_array(s1_repl_idx)) + when(s1_valid_not_nacked) { s2_repl_state := s1_repl_state } + + val waddr = Mux(resetting, flushCounter(idxBits - 1, 0), s2_repl_idx) + val wdata = Mux(resetting, 0.U, s2_new_repl_state) + val wen = resetting || s2_repl_wen + when(wen) { repl_array(waddr) := wdata } + + replacer.get_replace_way(s1_repl_state) + } else { + replacer.way + }) + + // release + val (_, _, releaseDone, c_count) = axiHelper(io.loadStoreAXI.w.bits, io.loadStoreAXI.w.fire) + val releaseRejected = Wire(Bool()) + val s1_release_data_valid = RegNext(dataArb.io.in(2).fire) + val s2_release_data_valid = RegNext(s1_release_data_valid && !releaseRejected) + releaseRejected := s2_release_data_valid && !io.loadStoreAXI.w.fire + val releaseDataBeat = + Cat(0.U, c_count) + Mux(releaseRejected, 0.U, s1_release_data_valid + Cat(0.U, s2_release_data_valid)) + val s1_release_last: Bool = RegEnable(releaseDataBeat === (refillCycles - 1).U, dataArb.io.in(2).fire) + val s2_release_last: Bool = RegEnable(s1_release_last, s1_release_data_valid && !releaseRejected) + + when(awState) { + io.loadStoreAXI.aw.valid := true.B + io.loadStoreAXI.aw.bits.addr := releaseAddress >> parameter.lgCacheBlockBytes << parameter.lgCacheBlockBytes + io.loadStoreAXI.aw.bits.len := (parameter.cacheBlockBytes * 8 / parameter.loadStoreParameter.dataWidth - 1).U + io.loadStoreAXI.aw.bits.size := parameter.lgCacheBlockBytes.U + io.loadStoreAXI.aw.bits.id := (mmioOffset - 1).U + } + + when(s2_release_data_valid) { + io.loadStoreAXI.w.valid := true.B + io.loadStoreAXI.w.bits := DontCare + io.loadStoreAXI.w.bits.data := s2_data_corrected + io.loadStoreAXI.w.bits.strb := (-1.S(io.loadStoreAXI.w.bits.strb.getWidth.W)).asUInt + io.loadStoreAXI.w.bits.last := s2_release_last + // tl_out_c.bits.corrupt := inWriteback && s2_data_error_uncorrectable + } + + val newCoh = ClientMetadata(0.U(2.W)) + releaseWay := s2_victim_or_hit_way + + if (!usingDataScratchpad) { + when(s2_victimize) { + assert(s2_valid_flush_line || s2_flush_valid || io.cpu.s2_nack) + val discard_line = s2_valid_flush_line && s2_req.size(1) || s2_flush_valid && flushing_req.size(1) + release_state := Mux( + s2_victim_dirty && !discard_line, + s_voluntary_aw, + s_voluntary_write_meta + ) + releaseAddress := Cat(s2_victim_tag, s2_req.addr(tagLSB - 1, idxLSB) << idxLSB) + } + + when(awState) { + when(io.loadStoreAXI.aw.ready) { + release_state := s_voluntary_writeback + release_ack_wait := true.B + release_ack_addr := releaseAddress + } + } + + when(release_state === s_voluntary_writeback) { + when(releaseDone) { release_state := s_voluntary_write_meta } + } + } + + dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles.U + dataArb.io.in(2).bits := dataArb.io.in(1).bits + dataArb.io.in(2).bits.write := false.B + dataArb.io.in(2).bits.addr := (probeIdx(releaseAddress) << blockOffBits).asUInt | (releaseDataBeat( + log2Ceil(refillCycles) - 1, + 0 + ) << rowOffBits) + dataArb.io.in(2).bits.wordMask := ~0.U((rowBytes / subWordBytes).W) + dataArb.io.in(2).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + dataArb.io.in(2).bits.way_en := ~0.U(nWays.W) + + metaArb.io.in(4).valid := release_state === s_voluntary_write_meta + metaArb.io.in(4).bits.write := true.B + metaArb.io.in(4).bits.way_en := releaseWay + metaArb.io.in(4).bits.idx := probeIdx(releaseAddress) + metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, releaseAddress(idxMSB, 0)) + metaArb.io.in(4).bits.data := tECC.encode(L1Metadata(releaseAddress >> tagLSB, newCoh).asUInt) + when(metaArb.io.in(4).fire) { release_state := s_ready } + + // cached response + (io.cpu.resp.bits: Data).waiveAll :<>= (s2_req: Data).waiveAll + io.cpu.resp.bits.has_data := s2_read + io.cpu.resp.bits.replay := false.B + io.cpu.s2_uncached := s2_uncached && !s2_hit + io.cpu.s2_paddr := s2_req.addr + io.cpu.s2_gpa := s2_tlb_xcpt.gpa + io.cpu.s2_gpa_is_pte := s2_tlb_xcpt.gpa_is_pte + + // report whether there are any outstanding accesses. disregard any + // slave-port accesses, since they don't affect local memory ordering. + val s1_isSlavePortAccess = s1_req.no_xcpt + val s2_isSlavePortAccess = s2_req.no_xcpt + io.cpu.ordered := !(s1_valid && !s1_isSlavePortAccess || s2_valid && !s2_isSlavePortAccess || cached_grant_wait || uncachedInFlight.asUInt.orR) + + val s1_xcpt_valid = tlb.io.req.valid && !s1_isSlavePortAccess && !s1_nack + io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), s2_tlb_xcpt, 0.U.asTypeOf(s2_tlb_xcpt)) + + if (usingDataScratchpad) { + assert(!(s2_valid_masked && (s2_req.cmd === M_XLR || s2_req.cmd === M_XSC))) + } else { + // ccover(tl_out.b.valid && !tl_out.b.ready, "BLOCK_B", "D$ B-channel blocked") + } + + // uncached response + val s1_uncached_data_word = { + val word_idx = if(log2Ceil(rowBits / 8) == log2Ceil(wordBytes)) 0.U else uncachedResp.addr(log2Ceil(rowBits / 8) - 1, log2Ceil(wordBytes)) + val words: Seq[UInt] = grouped(io.loadStoreAXI.r.bits.data, wordBits) + Mux1H(UIntToOH(word_idx), words) + } + val s2_uncached_data_word = RegEnable(s1_uncached_data_word, io.cpu.replay_next) + val doUncachedResp = RegNext(io.cpu.replay_next) + io.cpu.resp.valid := (s2_valid_hit_pre_data_ecc || doUncachedResp) && !s2_data_error + io.cpu.replay_next := io.loadStoreAXI.r.fire && grantIsUncachedData && !cacheParams.separateUncachedResp.B + when(doUncachedResp) { + assert(!s2_valid_hit) + io.cpu.resp.bits.replay := true.B + io.cpu.resp.bits.addr := s2_uncached_resp_addr + } + + io.cpu.uncached_resp.map { resp => + resp.valid := io.loadStoreAXI.r.valid && grantIsUncachedData + resp.bits.tag := uncachedResp.tag + resp.bits.size := uncachedResp.size + resp.bits.signed := uncachedResp.signed + resp.bits.data := new LoadGen( + uncachedResp.size, + uncachedResp.signed, + uncachedResp.addr, + s1_uncached_data_word, + false.B, + wordBytes + ).data + resp.bits.data_raw := s1_uncached_data_word + when(grantIsUncachedData && !resp.ready) { + io.loadStoreAXI.r.ready := false.B + } + } + + // load data subword mux/sign extension + val s2_data_word = (0 until rowBits by wordBits).map(i => s2_data_uncorrected(wordBits + i - 1, i)).reduce(_ | _) + val s2_data_word_corrected = + (0 until rowBits by wordBits).map(i => s2_data_corrected(wordBits + i - 1, i)).reduce(_ | _) + val s2_data_word_possibly_uncached = + Mux(cacheParams.pipelineWayMux.B && doUncachedResp, s2_uncached_data_word, 0.U) | s2_data_word + val loadgen = new LoadGen(s2_req.size, s2_req.signed, s2_req.addr, s2_data_word_possibly_uncached, s2_sc, wordBytes) + io.cpu.resp.bits.data := loadgen.data | s2_sc_fail + io.cpu.resp.bits.data_word_bypass := loadgen.wordData + io.cpu.resp.bits.data_raw := s2_data_word + io.cpu.resp.bits.store_data := pstore1_data + + // AMOs + amoalus.map { amoalus => + amoalus.zipWithIndex.map { case(amoalu, i) => + amoalu.io.mask := pstore1_mask >> (i * (parameter.xLen / 8)) + amoalu.io.cmd := (if (usingAtomicsInCache) pstore1_cmd else M_XWR) + amoalu.io.lhs := s2_data_word >> (i * parameter.xLen) + amoalu.io.rhs := pstore1_data >> (i * parameter.xLen) + amoalu + } + pstore1_storegen_data := (if (!usingDataScratchpad) VecInit(amoalus.map(_.io.out)).asUInt + else { + val mask = FillInterleaved(8, Mux(s2_correct, 0.U, pstore1_mask)) + VecInit(amoalus.map(_.io.out_unmasked)).asUInt & mask | s2_data_word_corrected & ~mask + }) + }.getOrElse { + if (!usingAtomics) { + assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") + } + } + + // flushes + if (!usingDataScratchpad) + when(RegNext(reset.asBool)) { resetting := true.B } + val flushCounterNext = flushCounter +& 1.U + val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays.U + val flushCounterWrap = flushCounterNext(log2Ceil(nSets) - 1, 0) + ccover( + s2_valid_masked && s2_cmd_flush_all && s2_meta_error, + "TAG_ECC_ERROR_DURING_FENCE_I", + "D$ ECC error in tag array during cache flush" + ) + ccover( + s2_valid_masked && s2_cmd_flush_all && s2_data_error, + "DATA_ECC_ERROR_DURING_FENCE_I", + "D$ ECC error in data array during cache flush" + ) + s1_flush_valid := metaArb.io + .in(5) + .fire && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait + metaArb.io.in(5).valid := flushing && !flushed + metaArb.io.in(5).bits.write := false.B + metaArb.io.in(5).bits.idx := flushCounter(idxBits - 1, 0) + metaArb.io.in(5).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.in(5).bits.idx << blockOffBits) + metaArb.io.in(5).bits.way_en := metaArb.io.in(4).bits.way_en + metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data + + // Only flush D$ on FENCE.I if some cached executable regions are untracked. + if (supports_flush) { + when(s2_valid_masked && s2_cmd_flush_all) { + when(!flushed && !io.cpu.s2_kill && !release_ack_wait && !uncachedInFlight.asUInt.orR) { + flushing := true.B + flushing_req := s2_req + } + } + + // when(tl_out_a.fire && !s2_uncached) { flushed := false.B } + when(io.loadStoreAXI.aw.fire && !s2_uncached) { flushed := false.B } + when(flushing) { + s1_victim_way := flushCounter >> log2Ceil(nSets) + when(s2_flush_valid) { + flushCounter := flushCounterNext + when(flushDone) { + flushed := true.B + if (!isPow2(nWays)) flushCounter := flushCounterWrap + } + } + when(flushed && release_state === s_ready && !release_ack_wait) { + flushing := false.B + } + } + } + metaArb.io.in(0).valid := resetting + metaArb.io.in(0).bits := metaArb.io.in(5).bits + metaArb.io.in(0).bits.write := true.B + metaArb.io.in(0).bits.way_en := ~0.U(nWays.W) + metaArb.io.in(0).bits.data := tECC.encode(L1Metadata(0.U, ClientMetadata(0.U)).asUInt) + when(resetting) { + flushCounter := flushCounterNext + when(flushDone) { + resetting := false.B + if (!isPow2(nWays)) flushCounter := flushCounterWrap + } + } + + // gate the clock + clock_en_reg := !cacheParams.clockGate.B || + //io.ptw.customCSRs.disableDCacheClockGate || // todo: customCSRs? + io.cpu.keep_clock_enabled || + metaArb.io.out.valid || // subsumes resetting || flushing + //s1Release || s2_release || + s1_valid || s2_valid || + // tlb_port.req.valid || + // s1_tlb_req_valid || s2_tlb_req_valid || + pstore1_held || pstore2_valid || + release_state =/= s_ready || + release_ack_wait || !release_queue_empty || + !tlb.io.req.ready || + cached_grant_wait || uncachedInFlight.asUInt.orR || + lrscCount > 0.U || blockProbeAfterGrantCount > 0.U + + // performance events + io.cpu.perf.acquire := io.loadStoreAXI.ar.fire + io.cpu.perf.release := releaseDone + io.cpu.perf.grant := d_done + io.cpu.perf.tlbMiss := io.ptw.req.fire + io.cpu.perf.storeBufferEmptyAfterLoad := !((s1_valid && s1_write) || + ((s2_valid && s2_write && !s2_waw_hazard) || pstore1_held) || + pstore2_valid) + io.cpu.perf.storeBufferEmptyAfterStore := !((s1_valid && s1_write) || + (s2_valid && s2_write && pstore1_rmw) || + ((s2_valid && s2_write && !s2_waw_hazard || pstore1_held) && pstore2_valid)) + io.cpu.perf.canAcceptStoreThenLoad := !(((s2_valid && s2_write && pstore1_rmw) && (s1_valid && s1_write && !s1_waw_hazard)) || + (pstore2_valid && pstore1_valid_likely && (s1_valid && s1_write))) + io.cpu.perf.canAcceptStoreThenRMW := io.cpu.perf.canAcceptStoreThenLoad && !pstore2_valid + io.cpu.perf.canAcceptLoadThenLoad := !((s1_valid && s1_write && needsRead( + s1_req + )) && ((s2_valid && s2_write && !s2_waw_hazard || pstore1_held) || pstore2_valid)) + io.cpu.perf.blocked := { + // stop reporting blocked just before unblocking to avoid overly conservative stalling + /*val beatsBeforeEnd = outer.crossing match { + case SynchronousCrossing(_) => 2 + case RationalCrossing(_) => 1 // assumes 1 < ratio <= 2; need more bookkeeping for optimal handling of >2 + case _: AsynchronousCrossing => 1 // likewise + case _: CreditedCrossing => 1 // likewise + } + val near_end_of_refill = + if (cacheBlockBytes / beatBytes <= beatsBeforeEnd) io.loadStoreAXI.r.valid + else { + val refill_count = RegInit(0.U((cacheBlockBytes / beatBytes).log2.W)) + when(io.loadStoreAXI.r.fire && grantIsRefill) { refill_count := refill_count + 1.U } + refill_count >= (cacheBlockBytes / beatBytes - beatsBeforeEnd).U + } + cached_grant_wait && !near_end_of_refill*/ + false.B // todo: axi grant wait? + } + + // report errors + val (data_error, data_error_uncorrectable, data_error_addr) = + if (usingDataScratchpad) (s2_valid_data_error, s2_data_error_uncorrectable, s2_req.addr) + else { + ( + RegNext(io.loadStoreAXI.w.fire && inWriteback && s2_data_error), + RegNext(s2_data_error_uncorrectable), + releaseAddress + ) // This is stable for a cycle after tl_out_c.fire, so don't need a register + } + { + val error_addr = + Mux( + metaArb.io.in(1).valid, + Cat(s2_first_meta_corrected.tag, metaArb.io.in(1).bits.addr(tagLSB - 1, idxLSB)), + data_error_addr >> idxLSB + ) << idxLSB + io.errors.uncorrectable.foreach { u => + u.valid := metaArb.io.in(1).valid && s2_meta_error_uncorrectable || data_error && data_error_uncorrectable + u.bits := error_addr + } + io.errors.correctable.foreach { c => + c.valid := metaArb.io.in(1).valid || data_error + c.bits := error_addr + io.errors.uncorrectable.foreach { u => when(u.valid) { c.valid := false.B } } + } + // io.errors.bus.valid := tl_out.d.fire && (tl_out.d.bits.denied || tl_out.d.bits.corrupt) + io.errors.bus.valid := false.B + io.errors.bus.bits := Mux(grantIsCached, s2_req.addr >> idxLSB << idxLSB, 0.U) + + ccoverNotScratchpad(io.errors.bus.valid && grantIsCached, "D_ERROR_CACHED", "D$ D-channel error, cached") + ccover(io.errors.bus.valid && !grantIsCached, "D_ERROR_UNCACHED", "D$ D-channel error, uncached") + } +// +// if (usingDataScratchpad) { +// val data_error_cover = Seq( +// property.CoverBoolean(!data_error, Seq("no_data_error")), +// property.CoverBoolean(data_error && !data_error_uncorrectable, Seq("data_correctable_error")), +// property.CoverBoolean(data_error && data_error_uncorrectable, Seq("data_uncorrectable_error")) +// ) +// val request_source = Seq( +// property.CoverBoolean(s2_isSlavePortAccess, Seq("from_TL")), +// property.CoverBoolean(!s2_isSlavePortAccess, Seq("from_CPU")) +// ) +// +// property.cover( +// new property.CrossProperty( +// Seq(data_error_cover, request_source), +// Seq(), +// "MemorySystem;;Scratchpad Memory Bit Flip Cross Covers" +// ) +// ) +// } else { +// +// val data_error_type = Seq( +// property.CoverBoolean(!s2_valid_data_error, Seq("no_data_error")), +// property.CoverBoolean(s2_valid_data_error && !s2_data_error_uncorrectable, Seq("data_correctable_error")), +// property.CoverBoolean(s2_valid_data_error && s2_data_error_uncorrectable, Seq("data_uncorrectable_error")) +// ) +// val data_error_dirty = Seq( +// property.CoverBoolean(!s2_victim_dirty, Seq("data_clean")), +// property.CoverBoolean(s2_victim_dirty, Seq("data_dirty")) +// ) +// val request_source = if (supports_flush) { +// Seq(property.CoverBoolean(!flushing, Seq("access")), property.CoverBoolean(flushing, Seq("during_flush"))) +// } else { +// Seq(property.CoverBoolean(true.B, Seq("never_flush"))) +// } +// val tag_error_cover = Seq( +// property.CoverBoolean(!s2_meta_error, Seq("no_tag_error")), +// property.CoverBoolean(s2_meta_error && !s2_meta_error_uncorrectable, Seq("tag_correctable_error")), +// property.CoverBoolean(s2_meta_error && s2_meta_error_uncorrectable, Seq("tag_uncorrectable_error")) +// ) +// property.cover( +// new property.CrossProperty( +// Seq(data_error_type, data_error_dirty, request_source, tag_error_cover), +// Seq(), +// "MemorySystem;;Cache Memory Bit Flip Cross Covers" +// ) +// ) +// } + + } // leaving gated-clock domain + val dcacheImpl = withClock(gated_clock) { new DCacheModuleImpl } + + def encodeData(x: UInt, poison: Bool) = + VecInit(grouped(x, eccBits).map(dECC.encode(_, if (dECC.canDetect) poison else false.B))).asUInt + def dummyEncodeData(x: UInt) = VecInit(grouped(x, eccBits).map(dECC.swizzle)).asUInt + def decodeData(x: UInt) = grouped(x, dECC.width(eccBits)).map(dECC.decode) + def eccMask(byteMask: UInt) = VecInit(grouped(byteMask, eccBytes).map(_.orR)).asUInt + def eccByteMask(byteMask: UInt) = FillInterleaved(eccBytes, eccMask(byteMask)) + + def likelyNeedsRead(req: HellaCacheReq): Bool = { + // req.cmd.isOneOf(M_XWR, M_PFW) + val res = !Seq(M_XWR, M_PFW).map(_ === req.cmd).reduce(_ ||_) || req.size < log2Ceil(eccBytes).U + assert(!needsRead(req) || res) + res } -} -trait HasHellaCacheModule { - val outer: HasHellaCache with HasTileParameters - implicit val p: Parameters - val dcachePorts = ListBuffer[HellaCacheIO]() - val dcacheArb = Module(new HellaCacheArbiter(outer.nDCachePorts)(outer.p)) - outer.dcache.module.io.cpu <> dcacheArb.io.mem -} + def isRead(cmd: UInt) = Seq(M_XRD, M_XLR, M_XSC).map(_ === cmd).reduce(_ || _) + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC + def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR -/** Metadata array used for all HellaCaches */ + def needsRead(req: HellaCacheReq) = + isRead(req.cmd) || + (isWrite(req.cmd) && (req.cmd === M_PWR || req.size < log2Ceil(eccBytes).U)) -class L1Metadata(implicit p: Parameters) extends L1HellaCacheBundle()(p) { - val coh = new ClientMetadata - val tag = UInt(tagBits.W) + def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = {} + def ccoverNotScratchpad(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = {} + + require( + !usingVM || tagLSB <= pgIdxBits, + s"D$$ set size must not exceed ${1 << (pgIdxBits - 10)} KiB; got ${(nSets * cacheBlockBytes) >> 10} KiB" + ) + def tagLSB: Int = untagBits + def probeIdx(b: UInt): UInt = b(idxMSB, idxLSB) } -object L1Metadata { - def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { - val meta = Wire(new L1Metadata) - meta.tag := tag - meta.coh := coh - meta +class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) { + val size = Wire(UInt(log2Ceil(log2Ceil(maxSize) + 1).W)) + size := typ + def misaligned: Bool = + (addr & ((1.U << size) - 1.U)(log2Ceil(maxSize) - 1, 0)).orR + + def mask = { + var res = 1.U + for (i <- 0 until log2Ceil(maxSize)) { + val upper = Mux(addr(i), res, 0.U) | Mux(size >= (i + 1).U, ((BigInt(1) << (1 << i)) - 1).U, 0.U) + val lower = Mux(addr(i), 0.U, res) + res = Cat(upper, lower) + } + res } -} -class L1MetaReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { - val idx = UInt(idxBits.W) - val way_en = UInt(nWays.W) - val tag = UInt(tagBits.W) -} + protected def genData(i: Int): UInt = + if (i >= log2Ceil(maxSize)) dat + else Mux(size === i.U, Fill(1 << (log2Ceil(maxSize) - i), dat((8 << i) - 1, 0)), genData(i + 1)) -class L1MetaWriteReq(implicit p: Parameters) extends L1MetaReadReq()(p) { - val data = new L1Metadata + def data = genData(0) + def wordData = genData(2) } -class L1MetadataArray[T <: L1Metadata](onReset: () => T)(implicit p: Parameters) extends L1HellaCacheModule()(p) { - val rstVal = onReset() - val io = IO(new Bundle { - val read = Flipped(Decoupled(new L1MetaReadReq)) - val write = Flipped(Decoupled(new L1MetaWriteReq)) - val resp = Output(Vec(nWays, rstVal.cloneType)) - }) - - val rst_cnt = RegInit(0.U(log2Up(nSets + 1).W)) - val rst = rst_cnt < nSets.U - val waddr = Mux(rst, rst_cnt, io.write.bits.idx) - val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt - val wmask = Mux(rst || (nWays == 1).B, (-1).S, io.write.bits.way_en.asSInt).asBools - val rmask = Mux(rst || (nWays == 1).B, (-1).S, io.read.bits.way_en.asSInt).asBools - when(rst) { rst_cnt := rst_cnt + 1.U } - - val metabits = rstVal.getWidth - val tag_array = SyncReadMem(nSets, Vec(nWays, UInt(metabits.W))) - val wen = rst || io.write.valid - when(wen) { - tag_array.write(waddr, VecInit.fill(nWays)(wdata), wmask) +class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) { + private val size = new StoreGen(typ, addr, dat, maxSize).size + + private def genData(logMinSize: Int): UInt = { + var res = dat + for (i <- log2Ceil(maxSize) - 1 to logMinSize by -1) { + val pos = 8 << i + val shifted = Mux(addr(i), res(2 * pos - 1, pos), res(pos - 1, 0)) + val doZero = (i == 0).B && zero + val zeroed = Mux(doZero, 0.U, shifted) + res = Cat( + Mux(size === i.U || doZero, Fill(8 * maxSize - pos, signed && zeroed(pos - 1)), res(8 * maxSize - 1, pos)), + zeroed + ) + } + res } - io.resp := tag_array.read(io.read.bits.idx, io.read.fire).map(_.asTypeOf(chiselTypeOf(rstVal))) - io.read.ready := !wen // so really this could be a 6T RAM - io.write.ready := !rst + def wordData = genData(2) + def data = genData(0) } diff --git a/rocketv/src/HellaCacheArbiter.scala b/rocketv/src/HellaCacheArbiter.scala index f4d60eeca..5862b887d 100644 --- a/rocketv/src/HellaCacheArbiter.scala +++ b/rocketv/src/HellaCacheArbiter.scala @@ -1,14 +1,118 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv +// TODO: inline and remove this Module import chisel3._ -import chisel3.util.{Cat, log2Up} +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.{Cat, log2Ceil} -class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { - val io = IO(new Bundle { - val requestor = Flipped(Vec(n, new HellaCacheIO)) - val mem = new HellaCacheIO - }) +case class HellaCacheArbiterParameter(useAsyncReset: Boolean, + xLen: Int, + fLen: Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) extends SerializableModuleParameter { + def lgCacheBlockBytes: Int = log2Ceil(cacheBlockBytes) + + def blockOffBits: Int = lgCacheBlockBytes + + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + + def idxBits: Int = log2Ceil(dcacheNSets) + + def dcacheArbPorts: Int = 2 + + def untagBits: Int = blockOffBits + idxBits + + def pgIdxBits: Int = 12 + + def coreDataBits: Int = xLen max fLen + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def coreDataBytes: Int = coreDataBits / 8 + + def vpnBits: Int = vaddrBits - pgIdxBits + + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) (if (usingHypervisor) 1 else 0) + 1 else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + + def maxHypervisorExtraAddrBits: Int = 2 + + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + + // static for now + def dcacheReqTagBits: Int = 6 + + def usingHypervisor = false +} + +class HellaCacheArbiterInterface(parameter: HellaCacheArbiterParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val requestor = Flipped(Vec(parameter.dcacheArbPorts, new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ))) + val mem = new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) +} + +@instantiable +class HellaCacheArbiter(val parameter: HellaCacheArbiterParameter) + extends FixedIORawModule(new HellaCacheArbiterInterface(parameter)) + with SerializableModule[HellaCacheArbiterParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + val n = parameter.dcacheArbPorts if (n == 1) { io.mem <> io.requestor.head @@ -27,7 +131,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { val req = io.requestor(i).req def connect_s0() = { io.mem.req.bits := req.bits - io.mem.req.bits.tag := Cat(req.bits.tag, i.U(log2Up(n).W)) + io.mem.req.bits.tag := Cat(req.bits.tag, i.U(log2Ceil(n).W)) s1_id := i.U } def connect_s1() = { @@ -53,7 +157,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { for (i <- 0 until n) { val resp = io.requestor(i).resp - val tag_hit = io.mem.resp.bits.tag(log2Up(n) - 1, 0) === i.U + val tag_hit = io.mem.resp.bits.tag(log2Ceil(n) - 1, 0) === i.U resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).s2_xcpt := io.mem.s2_xcpt io.requestor(i).s2_gpa := io.mem.s2_gpa @@ -66,18 +170,18 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { io.requestor(i).s2_paddr := io.mem.s2_paddr io.requestor(i).clock_enabled := io.mem.clock_enabled resp.bits := io.mem.resp.bits - resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) + resp.bits.tag := io.mem.resp.bits.tag >> log2Ceil(n) io.requestor(i).replay_next := io.mem.replay_next - io.requestor(i).uncached_resp.map { uncached_resp => - val uncached_tag_hit = io.mem.uncached_resp.get.bits.tag(log2Up(n) - 1, 0) === i.U + io.requestor(i).uncached_resp.foreach { uncached_resp => + val uncached_tag_hit = io.mem.uncached_resp.get.bits.tag(log2Ceil(n) - 1, 0) === i.U uncached_resp.valid := io.mem.uncached_resp.get.valid && uncached_tag_hit when(uncached_resp.ready && uncached_tag_hit) { io.mem.uncached_resp.get.ready := true.B } uncached_resp.bits := io.mem.uncached_resp.get.bits - uncached_resp.bits.tag := io.mem.uncached_resp.get.bits.tag >> log2Up(n) + uncached_resp.bits.tag := io.mem.uncached_resp.get.bits.tag >> log2Ceil(n) } } } From cf4ee99c72d12d3886d8b616bd670f3737356cc3 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Mon, 1 Jul 2024 22:02:51 +0800 Subject: [PATCH 072/140] [rocketv] add elaborator for DCache - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.DCache config --xLen=64 --fLen=64 --vaddrBitsExtended=1 --vaddrBits=64 --paddrBits=64 - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.DCache design --parameter ./DCache.json --run-firtool --- elaborator/src/rocketv/DCache.scala | 106 ++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 elaborator/src/rocketv/DCache.scala diff --git a/elaborator/src/rocketv/DCache.scala b/elaborator/src/rocketv/DCache.scala new file mode 100644 index 000000000..f791bdfea --- /dev/null +++ b/elaborator/src/rocketv/DCache.scala @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{HellaCache, HellaCacheParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object DCache extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class DCacheParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "fLen") fLen: Int, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nWays") nWays: Int, + @arg(name = "nSets") nSets: Int, + @arg(name = "rowBits") rowBits: Int, + @arg(name = "nTLBSets") nTLBSets: Int, + @arg(name = "nTLBWays") nTLBWays: Int, + @arg(name = "tagECC") tagECC: Option[String], + @arg(name = "dataECC") dataECC: Option[String], + @arg(name = "maxUncachedInFlight") maxUncachedInFlight: Int, + @arg(name = "separateUncachedResp") separateUncachedResp: Boolean, + @arg(name = "legal") legal: BitSet, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "read") read: BitSet, + @arg(name = "write") write: BitSet, + @arg(name = "putPartial") putPartial: BitSet, + @arg(name = "logic") logic: BitSet, + @arg(name = "arithmetic") arithmetic: BitSet, + @arg(name = "exec") exec: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet) { + def convert: HellaCacheParameter = HellaCacheParameter( + useAsyncReset, + clockGate, + xLen, + fLen, + usingVM, + paddrBits, + cacheBlockBytes, + nWays, + nSets, + rowBits, + nTLBSets, + nTLBWays, + tagECC, + dataECC, + maxUncachedInFlight, + separateUncachedResp, + legal, + cacheable, + read, + write, + putPartial, + logic, + arithmetic, + exec, + sideEffects + ) + } + + implicit def DCacheParameterMainParser: ParserForClass[DCacheParameterMain] = ParserForClass[DCacheParameterMain] + + @main + def config(@arg(name = "parameter") parameter: DCacheParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[HellaCache, HellaCacheParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 2389515630a1fd1a26746b2233852e2253c43f4f Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 17:04:21 +0800 Subject: [PATCH 073/140] [rocketv] copy Frontend into rocketv project --- rocketv/src/Frontend.scala | 402 +++++++++++++++++++++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 rocketv/src/Frontend.scala diff --git a/rocketv/src/Frontend.scala b/rocketv/src/Frontend.scala new file mode 100644 index 000000000..30297c503 --- /dev/null +++ b/rocketv/src/Frontend.scala @@ -0,0 +1,402 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package freechips.rocketchip.rocket + +import chisel3._ +import chisel3.util._ +import chisel3.{withClock,withReset} +import chisel3.experimental.SourceInfo +import org.chipsalliance.cde.config._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.tilelink.{TLWidthWidget} +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property + +class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { + val pc = UInt(vaddrBitsExtended.W) + val speculative = Bool() +} + +class FrontendExceptions extends Bundle { + val pf = new Bundle { + val inst = Bool() + } + val gf = new Bundle { + val inst = Bool() + } + val ae = new Bundle { + val inst = Bool() + } +} + +class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { + val btb = new BTBResp + val pc = UInt(vaddrBitsExtended.W) // ID stage PC + val data = UInt((fetchWidth * coreInstBits).W) + val mask = Bits(fetchWidth.W) + val xcpt = new FrontendExceptions + val replay = Bool() +} + +class FrontendPerfEvents extends Bundle { + val acquire = Bool() + val tlbMiss = Bool() +} + +class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { + val might_request = Output(Bool()) + val clock_enabled = Input(Bool()) + val req = Valid(new FrontendReq) + val sfence = Valid(new SFenceReq) + val resp = Flipped(Decoupled(new FrontendResp)) + val gpa = Flipped(Valid(UInt(vaddrBitsExtended.W))) + val btb_update = Valid(new BTBUpdate) + val bht_update = Valid(new BHTUpdate) + val ras_update = Valid(new RASUpdate) + val flush_icache = Output(Bool()) + val npc = Input(UInt(vaddrBitsExtended.W)) + val perf = Input(new FrontendPerfEvents()) + val progress = Output(Bool()) +} + +class Frontend(val icacheParams: ICacheParams, tileId: Int)(implicit p: Parameters) extends LazyModule { + lazy val module = new FrontendModule(this) + val icache = LazyModule(new ICache(icacheParams, tileId)) + val masterNode = icache.masterNode + val slaveNode = icache.slaveNode + val resetVectorSinkNode = BundleBridgeSink[UInt](Some(() => UInt(masterNode.edges.out.head.bundle.addressBits.W))) +} + +class FrontendBundle(val outer: Frontend) extends CoreBundle()(outer.p) { + val cpu = Flipped(new FrontendIO()) + val ptw = new TLBPTWIO() + val errors = new ICacheErrors +} + +class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) + with HasRocketCoreParameters + with HasL1ICacheParameters { + val io = IO(new FrontendBundle(outer)) + val io_reset_vector = outer.resetVectorSinkNode.bundle + implicit val edge = outer.masterNode.edges.out(0) + val icache = outer.icache.module + require(fetchWidth*coreInstBytes == outer.icacheParams.fetchBytes) + + val fq = withReset(reset.asBool || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) } + + val clock_en_reg = Reg(Bool()) + val clock_en = clock_en_reg || io.cpu.might_request + io.cpu.clock_enabled := clock_en + assert(!(io.cpu.req.valid || io.cpu.sfence.valid || io.cpu.flush_icache || io.cpu.bht_update.valid || io.cpu.btb_update.valid) || io.cpu.might_request) + val gated_clock = + if (!rocketParams.clockGate) clock + else ClockGate(clock, clock_en, "icache_clock_gate") + + icache.clock := gated_clock + icache.io.clock_enabled := clock_en + withClock (gated_clock) { // entering gated-clock domain + + val tlb = Module(new TLB(true, log2Ceil(fetchBytes), TLBConfig(nTLBSets, nTLBWays, outer.icacheParams.nTLBBasePageSectors, outer.icacheParams.nTLBSuperpages))) + + val s1_valid = Reg(Bool()) + val s2_valid = RegInit(false.B) + val s0_fq_has_space = + !fq.io.mask(fq.io.mask.getWidth-3) || + (!fq.io.mask(fq.io.mask.getWidth-2) && (!s1_valid || !s2_valid)) || + (!fq.io.mask(fq.io.mask.getWidth-1) && (!s1_valid && !s2_valid)) + val s0_valid = io.cpu.req.valid || s0_fq_has_space + s1_valid := s0_valid + val s1_pc = Reg(UInt(vaddrBitsExtended.W)) + val s1_speculative = Reg(Bool()) + val s2_pc = RegInit(t = UInt(vaddrBitsExtended.W), alignPC(io_reset_vector)) + val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B + val s2_btb_resp_bits = Reg(new BTBResp) + val s2_btb_taken = s2_btb_resp_valid && s2_btb_resp_bits.taken + val s2_tlb_resp = Reg(tlb.io.resp.cloneType) + val s2_xcpt = s2_tlb_resp.ae.inst || s2_tlb_resp.pf.inst || s2_tlb_resp.gf.inst + val s2_speculative = RegInit(false.B) + val s2_partial_insn_valid = RegInit(false.B) + val s2_partial_insn = Reg(UInt(coreInstBits.W)) + val wrong_path = RegInit(false.B) + + val s1_base_pc = ~(~s1_pc | (fetchBytes - 1).U) + val ntpc = s1_base_pc + fetchBytes.U + val predicted_npc = WireDefault(ntpc) + val predicted_taken = WireDefault(false.B) + + val s2_replay = Wire(Bool()) + s2_replay := (s2_valid && !fq.io.enq.fire) || RegNext(s2_replay && !s0_valid, true.B) + val npc = Mux(s2_replay, s2_pc, predicted_npc) + + s1_pc := io.cpu.npc + // consider RVC fetches across blocks to be non-speculative if the first + // part was non-speculative + val s0_speculative = + if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken + else true.B + s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative)) + + val s2_redirect = WireDefault(io.cpu.req.valid) + s2_valid := false.B + when (!s2_replay) { + s2_valid := !s2_redirect + s2_pc := s1_pc + s2_speculative := s1_speculative + s2_tlb_resp := tlb.io.resp + } + + val recent_progress_counter_init = 3.U + val recent_progress_counter = RegInit(recent_progress_counter_init) + val recent_progress = recent_progress_counter > 0.U + when(io.ptw.req.fire && recent_progress) { recent_progress_counter := recent_progress_counter - 1.U } + when(io.cpu.progress) { recent_progress_counter := recent_progress_counter_init } + + val s2_kill_speculative_tlb_refill = s2_speculative && !recent_progress + + io.ptw <> tlb.io.ptw + tlb.io.req.valid := s1_valid && !s2_replay + tlb.io.req.bits.cmd := M_XRD // Frontend only reads + tlb.io.req.bits.vaddr := s1_pc + tlb.io.req.bits.passthrough := false.B + tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth).U + tlb.io.req.bits.prv := io.ptw.status.prv + tlb.io.req.bits.v := io.ptw.status.v + tlb.io.sfence := io.cpu.sfence + tlb.io.kill := !s2_valid || s2_kill_speculative_tlb_refill + + icache.io.req.valid := s0_valid + icache.io.req.bits.addr := io.cpu.npc + icache.io.invalidate := io.cpu.flush_icache + icache.io.s1_paddr := tlb.io.resp.paddr + icache.io.s2_vaddr := s2_pc + icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay + val s2_can_speculatively_refill = s2_tlb_resp.cacheable && !io.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableSpeculativeICacheRefill + icache.io.s2_kill := s2_speculative && !s2_can_speculatively_refill || s2_xcpt + icache.io.s2_cacheable := s2_tlb_resp.cacheable + icache.io.s2_prefetch := s2_tlb_resp.prefetchable && !io.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableICachePrefetch + + fq.io.enq.valid := RegNext(s1_valid) && s2_valid && (icache.io.resp.valid || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) || (!s2_tlb_resp.miss && icache.io.s2_kill)) + fq.io.enq.bits.pc := s2_pc + io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)) + + fq.io.enq.bits.data := icache.io.resp.bits.data + fq.io.enq.bits.mask := ((1 << fetchWidth)-1).U << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes)) + fq.io.enq.bits.replay := (icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt) || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) + fq.io.enq.bits.btb := s2_btb_resp_bits + fq.io.enq.bits.btb.taken := s2_btb_taken + fq.io.enq.bits.xcpt := s2_tlb_resp + assert(!(s2_speculative && io.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableSpeculativeICacheRefill && !icache.io.s2_kill)) + when (icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae.inst := true.B } + + if (usingBTB) { + val btb = Module(new BTB) + btb.io.flush := false.B + btb.io.req.valid := false.B + btb.io.req.bits.addr := s1_pc + btb.io.btb_update := io.cpu.btb_update + btb.io.bht_update := io.cpu.bht_update + btb.io.ras_update.valid := false.B + btb.io.ras_update.bits := DontCare + btb.io.bht_advance.valid := false.B + btb.io.bht_advance.bits := DontCare + when (!s2_replay) { + btb.io.req.valid := !s2_redirect + s2_btb_resp_valid := btb.io.resp.valid + s2_btb_resp_bits := btb.io.resp.bits + } + when (btb.io.resp.valid && btb.io.resp.bits.taken) { + predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended) + predicted_taken := true.B + } + + val force_taken = io.ptw.customCSRs.bpmStatic + when (io.ptw.customCSRs.flushBTB) { btb.io.flush := true.B } + when (force_taken) { btb.io.bht_update.valid := false.B } + + val s2_base_pc = ~(~s2_pc | (fetchBytes-1).U) + val taken_idx = Wire(UInt()) + val after_idx = Wire(UInt()) + val useRAS = WireDefault(false.B) + val updateBTB = WireDefault(false.B) + + // If !prevTaken, ras_update / bht_update is always invalid. + taken_idx := DontCare + after_idx := DontCare + + def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = { + def insnIsRVC(bits: UInt) = bits(1,0) =/= 3.U + val prevRVI = prevValid && !insnIsRVC(prevBits) + val valid = fq.io.enq.bits.mask(idx) && !prevRVI + val bits = fq.io.enq.bits.data(coreInstBits*(idx+1)-1, coreInstBits*idx) + val rvc = insnIsRVC(bits) + val rviBits = Cat(bits, prevBits) + val rviBranch = rviBits(6,0) === Instructions.BEQ.value.U.extract(6,0) + val rviJump = rviBits(6,0) === Instructions.JAL.value.U.extract(6,0) + val rviJALR = rviBits(6,0) === Instructions.JALR.value.U.extract(6,0) + val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19,15) + val rviCall = (rviJALR || rviJump) && rviBits(7) + val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ + val rvcJAL = (xLen == 32).B && bits === Instructions32.C_JAL + val rvcJump = bits === Instructions.C_J || rvcJAL + val rvcImm = Mux(bits(14), new RVCDecoder(bits, xLen).bImm.asSInt, new RVCDecoder(bits, xLen).jImm.asSInt) + val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0.U + val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7) + val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0.U + val rvcCall = rvcJAL || rvcJALR + val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits)) + val predict_taken = s2_btb_resp_bits.bht.taken || force_taken + val taken = + prevRVI && (rviJump || rviJALR || rviBranch && predict_taken) || + valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && predict_taken) + val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn) + val predictJump = prevRVI && rviJump || valid && rvcJump + val predictBranch = predict_taken && (prevRVI && rviBranch || valid && rvcBranch) + + when (s2_valid && s2_btb_resp_valid && s2_btb_resp_bits.bridx === idx.U && valid && !rvc) { + // The BTB has predicted that the middle of an RVI instruction is + // a branch! Flush the BTB and the pipeline. + btb.io.flush := true.B + fq.io.enq.bits.replay := true.B + wrong_path := true.B + ccover(wrong_path, "BTB_NON_CFI_ON_WRONG_PATH", "BTB predicted a non-branch was taken while on the wrong path") + } + + when (!prevTaken) { + taken_idx := idx.U + after_idx := (idx + 1).U + btb.io.ras_update.valid := fq.io.enq.fire && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn)) + btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, + Mux(Mux(prevRVI, rviCall, rvcCall), CFIType.call, + Mux(Mux(prevRVI, rviBranch, rvcBranch) && !force_taken, CFIType.branch, + CFIType.jump))) + + when (!s2_btb_taken) { + when (fq.io.enq.fire && taken && !predictBranch && !predictJump && !predictReturn) { + wrong_path := true.B + } + when (s2_valid && predictReturn) { + useRAS := true.B + } + when (s2_valid && (predictBranch || predictJump)) { + val pc = s2_base_pc | (idx*coreInstBytes).U + val npc = + if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm) + else Mux(prevRVI, pc - coreInstBytes.U, pc).asSInt + Mux(prevRVI, rviImm, rvcImm) + predicted_npc := npc.asUInt + } + } + when (prevRVI && rviBranch || valid && rvcBranch) { + btb.io.bht_advance.valid := fq.io.enq.fire && !wrong_path + btb.io.bht_advance.bits := s2_btb_resp_bits + } + when (!s2_btb_resp_valid && (predictBranch && s2_btb_resp_bits.bht.strongly_taken || predictJump || predictReturn)) { + updateBTB := true.B + } + } + + if (idx == fetchWidth-1) { + when (fq.io.enq.fire) { + s2_partial_insn_valid := false.B + when (valid && !prevTaken && !rvc) { + s2_partial_insn_valid := true.B + s2_partial_insn := bits | 0x3.U + } + } + prevTaken || taken + } else { + scanInsns(idx + 1, valid, bits, prevTaken || taken) + } + } + + when (!io.cpu.btb_update.valid) { + val fetch_bubble_likely = !fq.io.mask(1) + btb.io.btb_update.valid := fq.io.enq.fire && !wrong_path && fetch_bubble_likely && updateBTB + btb.io.btb_update.bits.prediction.entry := tileParams.btb.get.nEntries.U + btb.io.btb_update.bits.isValid := true.B + btb.io.btb_update.bits.cfiType := btb.io.ras_update.bits.cfiType + btb.io.btb_update.bits.br_pc := s2_base_pc | (taken_idx << log2Ceil(coreInstBytes)) + btb.io.btb_update.bits.pc := s2_base_pc + } + + btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes)) + + val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B) + when (useRAS) { + predicted_npc := btb.io.ras_head.bits + } + when (fq.io.enq.fire && (s2_btb_taken || taken)) { + s2_partial_insn_valid := false.B + } + when (!s2_btb_taken) { + when (taken) { + fq.io.enq.bits.btb.bridx := taken_idx + fq.io.enq.bits.btb.taken := true.B + fq.io.enq.bits.btb.entry := tileParams.btb.get.nEntries.U + when (fq.io.enq.fire) { s2_redirect := true.B } + } + } + + assert(!s2_partial_insn_valid || fq.io.enq.bits.mask(0)) + when (s2_redirect) { s2_partial_insn_valid := false.B } + when (io.cpu.req.valid) { wrong_path := false.B } + } + + io.cpu.resp <> fq.io.deq + + // supply guest physical address to commit stage + val gpa_valid = Reg(Bool()) + val gpa = Reg(UInt(vaddrBitsExtended.W)) + when (fq.io.enq.fire && s2_tlb_resp.gf.inst) { + when (!gpa_valid) { + gpa := s2_tlb_resp.gpa + } + gpa_valid := true.B + } + when (io.cpu.req.valid) { + gpa_valid := false.B + } + io.cpu.gpa.valid := gpa_valid + io.cpu.gpa.bits := gpa + + // performance events + io.cpu.perf.acquire := icache.io.perf.acquire + io.cpu.perf.tlbMiss := io.ptw.req.fire + io.errors := icache.io.errors + + // gate the clock + clock_en_reg := !rocketParams.clockGate.B || + io.cpu.might_request || // chicken bit + icache.io.keep_clock_enabled || // I$ miss or ITIM access + s1_valid || s2_valid || // some fetch in flight + !tlb.io.req.ready || // handling TLB miss + !fq.io.mask(fq.io.mask.getWidth-1) // queue not full + } // leaving gated-clock domain + + def alignPC(pc: UInt) = ~(~pc | (coreInstBytes - 1).U) + + def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = + property.cover(cond, s"FRONTEND_$label", "Rocket;;" + desc) +} + +/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ +trait HasICacheFrontend extends CanHavePTW { this: BaseTile => + val module: HasICacheFrontendModule + val frontend = LazyModule(new Frontend(tileParams.icache.get, tileId)) + tlMasterXbar.node := TLWidthWidget(tileParams.icache.get.rowBits/8) := frontend.masterNode + connectTLSlave(frontend.slaveNode, tileParams.core.fetchBytes) + frontend.icache.hartIdSinkNodeOpt.foreach { _ := hartIdNexusNode } + frontend.icache.mmioAddressPrefixSinkNodeOpt.foreach { _ := mmioAddressPrefixNexusNode } + frontend.resetVectorSinkNode := resetVectorNexusNode + nPTWPorts += 1 + + // This should be a None in the case of not having an ITIM address, when we + // don't actually use the device that is instantiated in the frontend. + private val deviceOpt = if (tileParams.icache.get.itimAddr.isDefined) Some(frontend.icache.device) else None +} + +trait HasICacheFrontendModule extends CanHavePTWModule { + val outer: HasICacheFrontend + ptwPorts += outer.frontend.module.io.ptw +} From 6178e8ef520a846b67ec25e01e386d53f0a9b50e Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Mon, 1 Jul 2024 19:05:37 +0800 Subject: [PATCH 074/140] [rocketv] migrate Frontend --- rocketv/src/Bundle.scala | 40 +- rocketv/src/FetchQueue.scala | 88 ++++ rocketv/src/Frontend.scala | 930 +++++++++++++++++++++-------------- rocketv/src/ImmGen.scala | 37 ++ 4 files changed, 736 insertions(+), 359 deletions(-) create mode 100644 rocketv/src/FetchQueue.scala create mode 100644 rocketv/src/ImmGen.scala diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index c60156caf..f6da32b86 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -169,6 +169,7 @@ class BTBResp( object BHTResp { def taken(bht: BHTResp): Bool = bht.value(0) + def strongly_taken(bhtResp: BHTResp): Bool = bhtResp.value === 1.U } class BHTResp(bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) extends Bundle { @@ -543,10 +544,10 @@ class FrontendResp( bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], vaddrBitsExtended: Int, - coreInstBits: Int) + coreInstBits: Int, + fetchWidth: Int) extends Bundle { - def fetchWidth = 1 - val btb = new BTBResp(vaddrBits, entries, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) + val btb = new BTBResp(vaddrBits, entries, fetchWidth, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int]) val pc = UInt(vaddrBitsExtended.W) // ID stage PC val data = UInt((fetchWidth * coreInstBits).W) val mask = UInt(fetchWidth.W) @@ -1365,3 +1366,36 @@ class DCacheDataReq(untagBits: Int, encBits: Int, rowBytes: Int, eccBytes: Int, val eccMask: UInt = UInt((wordBytes / eccBytes).W) val way_en: UInt = UInt(nWays.W) } + +class FrontendReq(vaddrBitsExtended: Int) extends Bundle { + val pc = UInt(vaddrBitsExtended.W) + val speculative = Bool() +} + +class FrontendPerfEvents extends Bundle { + val acquire = Bool() + val tlbMiss = Bool() +} + +class FrontendIO(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entries: Int, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], coreInstBits: Int, fetchWidth: Int) extends Bundle { + val might_request = Output(Bool()) + val clock_enabled = Input(Bool()) + val req = Valid(new FrontendReq(vaddrBitsExtended)) + val sfence = Valid(new SFenceReq(vaddrBits, asidBits)) + val resp = Flipped(Decoupled(new FrontendResp(vaddrBits, entries, bhtHistoryLength, bhtCounterLength, vaddrBitsExtended, coreInstBits, fetchWidth))) + val gpa = Flipped(Valid(UInt(vaddrBitsExtended.W))) + val btb_update = Valid(new BTBUpdate(vaddrBits, entries, fetchWidth, bhtHistoryLength, bhtCounterLength)) + val bht_update = Valid(new BHTUpdate(bhtHistoryLength, bhtCounterLength, vaddrBits)) + val ras_update = Valid(new RASUpdate(vaddrBits)) + val flush_icache = Output(Bool()) + val npc = Input(UInt(vaddrBitsExtended.W)) + val perf = Input(new FrontendPerfEvents) + val progress = Output(Bool()) +} + +// Non-diplomatic version of Frontend +class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entries: Int, bhtHistoryLength: Option[Int], bhtCounterLength: Option[Int], coreInstBits: Int, nPMPs: Int, vpnBits: Int, paddrBits: Int, pgLevels: Int, xLen: Int, maxPAddrBits: Int, pgIdxBits: Int, hasCorrectable: Boolean, hasUncorrectable: Boolean, fetchWidth: Int) extends Bundle { + val cpu = Flipped(new FrontendIO(vaddrBitsExtended, vaddrBits, asidBits, entries, bhtHistoryLength, bhtCounterLength, coreInstBits, fetchWidth)) + val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits) + val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits) +} diff --git a/rocketv/src/FetchQueue.scala b/rocketv/src/FetchQueue.scala new file mode 100644 index 000000000..c1ad35fc1 --- /dev/null +++ b/rocketv/src/FetchQueue.scala @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu + +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.experimental.hierarchy.instantiable +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util._ + +object FetchQueueParameter { + implicit def rwP: upickle.default.ReadWriter[FetchQueueParameter] = upickle.default.macroRW[FetchQueueParameter] +} + +case class FetchQueueParameter( + useAsyncReset: Boolean, + entries: Int, + vaddrBits: Int, + respEntries: Int, + bhtHistoryLength: Option[Int], + bhtCounterLength: Option[Int], + vaddrBitsExtended: Int, + coreInstBits: Int, + fetchWidth: Int) + extends SerializableModuleParameter { + def gen = new FrontendResp( + vaddrBits, + respEntries, + bhtHistoryLength, + bhtCounterLength, + vaddrBitsExtended, + coreInstBits, + fetchWidth + ) +} + +class FetchQueueInterface(parameter: FetchQueueParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val enq = Flipped(Decoupled(parameter.gen)) + val deq = Decoupled(parameter.gen) + val mask = Output(UInt(parameter.entries.W)) +} + +@instantiable +class FetchQueue(val parameter: FetchQueueParameter) + extends FixedIORawModule(new FetchQueueInterface(parameter)) + with SerializableModule[FetchQueueParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + private val valid = RegInit(VecInit(Seq.fill(parameter.entries) { false.B })) + private val elts = Reg(Vec(parameter.entries, parameter.gen)) + + for (i <- 0 until parameter.entries) { + def paddedValid(i: Int) = if (i == -1) true.B else if (i == parameter.entries) false.B else valid(i) + + val flow = true + val wdata = if (i == parameter.entries - 1) io.enq.bits else Mux(valid(i + 1), elts(i + 1), io.enq.bits) + val wen = + Mux( + io.deq.ready, + paddedValid(i + 1) || io.enq.fire && valid(i), + io.enq.fire && paddedValid(i - 1) && !valid(i) + ) + when(wen) { elts(i) := wdata } + + valid(i) := + Mux( + io.deq.ready, + paddedValid(i + 1) || io.enq.fire && ((i == 0 && !flow).B || valid(i)), + io.enq.fire && paddedValid(i - 1) || valid(i) + ) + } + + io.enq.ready := !valid(parameter.entries - 1) + io.deq.valid := valid(0) + io.deq.bits := elts.head + + when(io.enq.valid) { io.deq.valid := true.B } + when(!valid(0)) { io.deq.bits := io.enq.bits } + + io.mask := valid.asUInt +} diff --git a/rocketv/src/Frontend.scala b/rocketv/src/Frontend.scala index 30297c503..36a313d84 100644 --- a/rocketv/src/Frontend.scala +++ b/rocketv/src/Frontend.scala @@ -1,402 +1,620 @@ -// See LICENSE.Berkeley for license details. -// See LICENSE.SiFive for license details. - -package freechips.rocketchip.rocket +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ +import chisel3.experimental.hierarchy.{Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util._ -import chisel3.{withClock,withReset} -import chisel3.experimental.SourceInfo -import org.chipsalliance.cde.config._ -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.tile._ -import freechips.rocketchip.tilelink.{TLWidthWidget} -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property - -class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { - val pc = UInt(vaddrBitsExtended.W) - val speculative = Bool() +import chisel3.util.circt.ClockGate +import chisel3.util.experimental.BitSet +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} + +object FrontendParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if(str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit def rwP: upickle.default.ReadWriter[FrontendParameter] = upickle.default.macroRW[FrontendParameter] } -class FrontendExceptions extends Bundle { - val pf = new Bundle { - val inst = Bool() - } - val gf = new Bundle { - val inst = Bool() +case class FrontendParameter( + // must be false, since resetVector will be aligned here. + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingBTB: Boolean, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + blockBytes: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + nPMPs: Int, + paddrBits: Int, + pgLevels: Int, + asidBits: Int, + bhtParameter: Option[BHTParameter], + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) extends SerializableModuleParameter { + // static now + def hasCorrectable: Boolean = false + def usingHypervisor: Boolean = false + def hasUncorrectable: Boolean = false + def usingAtomicsOnlyForIO: Boolean = false + def itimParameter: Option[AXI4BundleParameter] = None + + // calculate + def bhtHistoryLength: Option[Int] = bhtParameter.map(_.historyLength) + def bhtCounterLength: Option[Int] = bhtParameter.map(_.counterLength) + def usingAtomicsInCache: Boolean = usingAtomics && !usingAtomicsOnlyForIO + private def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) 1 + (if (usingHypervisor) 1 else 0) else 0) + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + def maxHypervisorExtraAddrBits: Int = 2 + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1).min(xLen) } - val ae = new Bundle { - val inst = Bool() + def entries: Int = btbEntries + def coreInstBits: Int = if (usingCompressed) 16 else 32 + def vpnBits: Int = vaddrBits - pgIdxBits + def maxPAddrBits: Int = xLen match { + case 32 => 34 + case 64 => 56 } + def pgIdxBits: Int = 12 + val fetchWidth: Int = if (usingCompressed) 2 else 1 + def fetchBytes: Int = 4 + val coreInstBytes = (if (usingCompressed) 16 else 32) / 8 + def resetVectorBits: Int = paddrBits + def pmaCheckerParameter: PMACheckerParameter = PMACheckerParameter( + paddrBits = paddrBits, + legal = legal, + cacheable = cacheable, + read = read, + write = write, + putPartial = putPartial, + logic = logic, + arithmetic = arithmetic, + exec = exec, + sideEffects = sideEffects, + ) + val rowBits: Int = blockBytes * 8 + val instructionFetchParameter: AXI4BundleParameter = AXI4BundleParameter( + idWidth = 1, + dataWidth = rowBits, + addrWidth = paddrBits, + userReqWidth = 0, + userDataWidth = 0, + userRespWidth = 0, + hasAW = false, + hasW = false, + hasB = false, + hasAR = true, + hasR = true, + supportId = true, + supportRegion = false, + supportLen = true, + supportSize = true, + supportBurst = true, + supportLock = false, + supportCache = false, + supportQos = false, + supportStrb = false, + supportResp = false, + supportProt = false, + ) + + def icacheParameter: ICacheParameter = ICacheParameter( + useAsyncReset = useAsyncReset, + prefetch = iCachePrefetch, + nSets = iCacheNSets, + nWays = iCacheNWays, + blockBytes = blockBytes, + usingVM = usingVM, + vaddrBits = vaddrBits, + paddrBits = paddrBits + ) + + def tlbParameter: TLBParameter = TLBParameter( + useAsyncReset = useAsyncReset, + xLen = xLen, + nSets = itlbNSets, + nWays = itlbNWays, + nSectors = itlbNSectors, + nSuperpageEntries = itlbNSuperpageEntries, + asidBits = asidBits, + pgLevels = pgLevels, + usingHypervisor = usingHypervisor, + usingAtomics = usingAtomics, + usingDataScratchpad = usingDataScratchpad, + usingAtomicsOnlyForIO = usingAtomicsOnlyForIO, + usingVM = usingVM, + usingAtomicsInCache = usingAtomicsInCache, + nPMPs = nPMPs, + pmaCheckerParameter = pmaCheckerParameter, + paddrBits = paddrBits, + isITLB = true, + ) + def btbParameter: Option[BTBParameter] = Option.when(usingBTB)(BTBParameter( + useAsyncReset = useAsyncReset, + fetchBytes = fetchBytes, + vaddrBits = vaddrBits, + entries = btbEntries, + nMatchBits = btbNMatchBits, + nPages = nPages, + nRAS = nRAS, + cacheBlockBytes = blockBytes, + iCacheSet = iCacheNSets, + useCompressed = usingCompressed, + updatesOutOfOrder = btbUpdatesOutOfOrder, + bhtParameter = bhtParameter, + fetchWidth = fetchWidth + )) + + // entry = 5 + def fetchQueueParameter: FetchQueueParameter = FetchQueueParameter( + // static to be false. + useAsyncReset = false, + entries = 5, + vaddrBits = vaddrBits, + respEntries = entries, + bhtHistoryLength = bhtHistoryLength, + bhtCounterLength = bhtCounterLength, + vaddrBitsExtended = vaddrBitsExtended, + coreInstBits = coreInstBits, + fetchWidth = fetchWidth + ) } -class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { - val btb = new BTBResp - val pc = UInt(vaddrBitsExtended.W) // ID stage PC - val data = UInt((fetchWidth * coreInstBits).W) - val mask = Bits(fetchWidth.W) - val xcpt = new FrontendExceptions - val replay = Bool() +class FrontendInterface(parameter: FrontendParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + val nonDiplomatic = new FrontendBundle( + parameter.vaddrBitsExtended, + parameter.vaddrBits, + parameter.asidBits, + parameter.entries, + parameter.bhtHistoryLength, + parameter.bhtCounterLength, + parameter.coreInstBits, + parameter.nPMPs, + parameter.vpnBits, + parameter.paddrBits, + parameter.pgLevels, + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits, + parameter.hasCorrectable, + parameter.hasUncorrectable, + parameter.fetchWidth + ) + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) } -class FrontendPerfEvents extends Bundle { - val acquire = Bool() - val tlbMiss = Bool() -} +@instantiable +class Frontend(val parameter: FrontendParameter) + extends FixedIORawModule(new FrontendInterface(parameter)) + with SerializableModule[FrontendParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + + def xLen = parameter.xLen + def fetchWidth = parameter.fetchWidth + def fetchBytes = parameter.fetchBytes + def vaddrBitsExtended = parameter.vaddrBitsExtended + def coreInstBits = parameter.coreInstBits + def vaddrBits = parameter.vaddrBits + def entries = parameter.entries + def coreInstBytes = parameter.coreInstBytes + def usingBTB = parameter.usingBTB + def bhtHistoryLength = parameter.bhtHistoryLength + def bhtCounterLength = parameter.bhtCounterLength + def usingCompressed = parameter.usingCompressed + def clock = io.clock + + object rocketParams { + def clockGate = parameter.clockGate + } -class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { - val might_request = Output(Bool()) - val clock_enabled = Input(Bool()) - val req = Valid(new FrontendReq) - val sfence = Valid(new SFenceReq) - val resp = Flipped(Decoupled(new FrontendResp)) - val gpa = Flipped(Valid(UInt(vaddrBitsExtended.W))) - val btb_update = Valid(new BTBUpdate) - val bht_update = Valid(new BHTUpdate) - val ras_update = Valid(new RASUpdate) - val flush_icache = Output(Bool()) - val npc = Input(UInt(vaddrBitsExtended.W)) - val perf = Input(new FrontendPerfEvents()) - val progress = Output(Bool()) -} + object tileParams { + def btb = parameter.btbParameter + } -class Frontend(val icacheParams: ICacheParams, tileId: Int)(implicit p: Parameters) extends LazyModule { - lazy val module = new FrontendModule(this) - val icache = LazyModule(new ICache(icacheParams, tileId)) - val masterNode = icache.masterNode - val slaveNode = icache.slaveNode - val resetVectorSinkNode = BundleBridgeSink[UInt](Some(() => UInt(masterNode.edges.out.head.bundle.addressBits.W))) -} + object Instructions { + def BEQ: BitPat = BitPat("b?????????????????000?????1100011") -class FrontendBundle(val outer: Frontend) extends CoreBundle()(outer.p) { - val cpu = Flipped(new FrontendIO()) - val ptw = new TLBPTWIO() - val errors = new ICacheErrors -} + def JAL = BitPat("b?????????????????????????1101111") + + def JALR = BitPat("b?????????????????000?????1100111") + + def C_BEQZ = BitPat("b????????????????110???????????01") + + def C_BNEZ = BitPat("b????????????????111???????????01") + + def C_J = BitPat("b????????????????101???????????01") -class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) - with HasRocketCoreParameters - with HasL1ICacheParameters { - val io = IO(new FrontendBundle(outer)) - val io_reset_vector = outer.resetVectorSinkNode.bundle - implicit val edge = outer.masterNode.edges.out(0) - val icache = outer.icache.module - require(fetchWidth*coreInstBytes == outer.icacheParams.fetchBytes) - - val fq = withReset(reset.asBool || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) } - - val clock_en_reg = Reg(Bool()) - val clock_en = clock_en_reg || io.cpu.might_request - io.cpu.clock_enabled := clock_en - assert(!(io.cpu.req.valid || io.cpu.sfence.valid || io.cpu.flush_icache || io.cpu.bht_update.valid || io.cpu.btb_update.valid) || io.cpu.might_request) - val gated_clock = + def C_ADD = BitPat("b????????????????1001??????????10") + + def C_MV = BitPat("b????????????????1000??????????10") + } + + object Instructions32 { + def C_JAL = BitPat("b????????????????001???????????01") + } + + val clock_en_reg: Bool = Reg(Bool()) + val clock_en: Bool = clock_en_reg || io.nonDiplomatic.cpu.might_request + val gated_clock: Clock = if (!rocketParams.clockGate) clock - else ClockGate(clock, clock_en, "icache_clock_gate") + else ClockGate(clock, clock_en) - icache.clock := gated_clock + val icache = Instantiate(new ICache(parameter.icacheParameter)) + icache.io.clock := gated_clock + icache.io.reset := io.reset icache.io.clock_enabled := clock_en - withClock (gated_clock) { // entering gated-clock domain - - val tlb = Module(new TLB(true, log2Ceil(fetchBytes), TLBConfig(nTLBSets, nTLBWays, outer.icacheParams.nTLBBasePageSectors, outer.icacheParams.nTLBSuperpages))) - - val s1_valid = Reg(Bool()) - val s2_valid = RegInit(false.B) - val s0_fq_has_space = - !fq.io.mask(fq.io.mask.getWidth-3) || - (!fq.io.mask(fq.io.mask.getWidth-2) && (!s1_valid || !s2_valid)) || - (!fq.io.mask(fq.io.mask.getWidth-1) && (!s1_valid && !s2_valid)) - val s0_valid = io.cpu.req.valid || s0_fq_has_space - s1_valid := s0_valid - val s1_pc = Reg(UInt(vaddrBitsExtended.W)) - val s1_speculative = Reg(Bool()) - val s2_pc = RegInit(t = UInt(vaddrBitsExtended.W), alignPC(io_reset_vector)) - val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B - val s2_btb_resp_bits = Reg(new BTBResp) - val s2_btb_taken = s2_btb_resp_valid && s2_btb_resp_bits.taken - val s2_tlb_resp = Reg(tlb.io.resp.cloneType) - val s2_xcpt = s2_tlb_resp.ae.inst || s2_tlb_resp.pf.inst || s2_tlb_resp.gf.inst - val s2_speculative = RegInit(false.B) - val s2_partial_insn_valid = RegInit(false.B) - val s2_partial_insn = Reg(UInt(coreInstBits.W)) - val wrong_path = RegInit(false.B) - - val s1_base_pc = ~(~s1_pc | (fetchBytes - 1).U) - val ntpc = s1_base_pc + fetchBytes.U - val predicted_npc = WireDefault(ntpc) - val predicted_taken = WireDefault(false.B) - - val s2_replay = Wire(Bool()) - s2_replay := (s2_valid && !fq.io.enq.fire) || RegNext(s2_replay && !s0_valid, true.B) - val npc = Mux(s2_replay, s2_pc, predicted_npc) - - s1_pc := io.cpu.npc - // consider RVC fetches across blocks to be non-speculative if the first - // part was non-speculative - val s0_speculative = - if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken - else true.B - s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative)) - - val s2_redirect = WireDefault(io.cpu.req.valid) - s2_valid := false.B - when (!s2_replay) { - s2_valid := !s2_redirect - s2_pc := s1_pc - s2_speculative := s1_speculative - s2_tlb_resp := tlb.io.resp + (icache.io.itimAXI zip io.itimAXI).foreach{ case (frontend, itim) => itim :<>= frontend } + io.instructionFetchAXI :<>= icache.io.instructionFetchAXI + val tlb = Instantiate(new TLB(parameter.tlbParameter)) + tlb.io.clock := gated_clock + tlb.io.reset := io.reset + io.nonDiplomatic.ptw :<>= tlb.io.ptw + io.nonDiplomatic.cpu.clock_enabled := clock_en + val btb = parameter.btbParameter.map(btbParameter => Instantiate(new BTB(btbParameter))) + btb.foreach { btb => + btb.io.clock := io.clock + btb.io.reset := io.reset + btb.io.btb_update := io.nonDiplomatic.cpu.btb_update + btb.io.bht_update := io.nonDiplomatic.cpu.bht_update } - - val recent_progress_counter_init = 3.U - val recent_progress_counter = RegInit(recent_progress_counter_init) - val recent_progress = recent_progress_counter > 0.U - when(io.ptw.req.fire && recent_progress) { recent_progress_counter := recent_progress_counter - 1.U } - when(io.cpu.progress) { recent_progress_counter := recent_progress_counter_init } - - val s2_kill_speculative_tlb_refill = s2_speculative && !recent_progress - - io.ptw <> tlb.io.ptw - tlb.io.req.valid := s1_valid && !s2_replay - tlb.io.req.bits.cmd := M_XRD // Frontend only reads - tlb.io.req.bits.vaddr := s1_pc - tlb.io.req.bits.passthrough := false.B - tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth).U - tlb.io.req.bits.prv := io.ptw.status.prv - tlb.io.req.bits.v := io.ptw.status.v - tlb.io.sfence := io.cpu.sfence - tlb.io.kill := !s2_valid || s2_kill_speculative_tlb_refill - - icache.io.req.valid := s0_valid - icache.io.req.bits.addr := io.cpu.npc - icache.io.invalidate := io.cpu.flush_icache - icache.io.s1_paddr := tlb.io.resp.paddr - icache.io.s2_vaddr := s2_pc - icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay - val s2_can_speculatively_refill = s2_tlb_resp.cacheable && !io.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableSpeculativeICacheRefill - icache.io.s2_kill := s2_speculative && !s2_can_speculatively_refill || s2_xcpt - icache.io.s2_cacheable := s2_tlb_resp.cacheable - icache.io.s2_prefetch := s2_tlb_resp.prefetchable && !io.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableICachePrefetch - - fq.io.enq.valid := RegNext(s1_valid) && s2_valid && (icache.io.resp.valid || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) || (!s2_tlb_resp.miss && icache.io.s2_kill)) - fq.io.enq.bits.pc := s2_pc - io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)) - - fq.io.enq.bits.data := icache.io.resp.bits.data - fq.io.enq.bits.mask := ((1 << fetchWidth)-1).U << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes)) - fq.io.enq.bits.replay := (icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt) || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) - fq.io.enq.bits.btb := s2_btb_resp_bits - fq.io.enq.bits.btb.taken := s2_btb_taken - fq.io.enq.bits.xcpt := s2_tlb_resp - assert(!(s2_speculative && io.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableSpeculativeICacheRefill && !icache.io.s2_kill)) - when (icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae.inst := true.B } - - if (usingBTB) { - val btb = Module(new BTB) - btb.io.flush := false.B - btb.io.req.valid := false.B - btb.io.req.bits.addr := s1_pc - btb.io.btb_update := io.cpu.btb_update - btb.io.bht_update := io.cpu.bht_update - btb.io.ras_update.valid := false.B - btb.io.ras_update.bits := DontCare - btb.io.bht_advance.valid := false.B - btb.io.bht_advance.bits := DontCare - when (!s2_replay) { - btb.io.req.valid := !s2_redirect - s2_btb_resp_valid := btb.io.resp.valid - s2_btb_resp_bits := btb.io.resp.bits - } - when (btb.io.resp.valid && btb.io.resp.bits.taken) { - predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended) - predicted_taken := true.B + val fq = Instantiate(new FetchQueue(parameter.fetchQueueParameter)) + fq.io.clock := io.clock + fq.io.reset := io.reset.asBool || io.nonDiplomatic.cpu.req.valid + + assert(!(io.nonDiplomatic.cpu.req.valid || io.nonDiplomatic.cpu.sfence.valid || io.nonDiplomatic.cpu.flush_icache || io.nonDiplomatic.cpu.bht_update.valid || io.nonDiplomatic.cpu.btb_update.valid) || io.nonDiplomatic.cpu.might_request) + + withClock(gated_clock) { // entering gated-clock domain + val s1_valid = Reg(Bool()) + val s2_valid = RegInit(false.B) + val s0_fq_has_space = + !fq.io.mask(fq.io.mask.getWidth - 3) || + (!fq.io.mask(fq.io.mask.getWidth - 2) && (!s1_valid || !s2_valid)) || + (!fq.io.mask(fq.io.mask.getWidth - 1) && (!s1_valid && !s2_valid)) + val s0_valid = io.nonDiplomatic.cpu.req.valid || s0_fq_has_space + s1_valid := s0_valid + val s1_pc = Reg(UInt(vaddrBitsExtended.W)) + val s1_speculative = Reg(Bool()) + // TODO: make it Const + def alignPC(pc: UInt): UInt = ~(~pc | (coreInstBytes - 1).U) + val s2_pc = RegInit(UInt(vaddrBitsExtended.W), alignPC(io.resetVector)) + val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B + val s2_btb_resp_bits = Reg(new BTBResp(vaddrBits, entries, fetchWidth, bhtHistoryLength, bhtCounterLength)) + val s2_btb_taken = s2_btb_resp_valid && s2_btb_resp_bits.taken + val s2_tlb_resp = Reg(tlb.io.resp.cloneType) + val s2_xcpt = s2_tlb_resp.ae.inst || s2_tlb_resp.pf.inst || s2_tlb_resp.gf.inst + val s2_speculative = RegInit(false.B) + val s2_partial_insn_valid = RegInit(false.B) + val s2_partial_insn = Reg(UInt(coreInstBits.W)) + val wrong_path = RegInit(false.B) + + val s1_base_pc: UInt = ~(~s1_pc | (fetchBytes - 1).U) + val ntpc = s1_base_pc + fetchBytes.U + val predicted_npc = WireDefault(ntpc) + val predicted_taken = WireDefault(false.B) + + val s2_replay = Wire(Bool()) + s2_replay := (s2_valid && !fq.io.enq.fire) || RegNext(s2_replay && !s0_valid, true.B) + val npc = Mux(s2_replay, s2_pc, predicted_npc) + + s1_pc := io.nonDiplomatic.cpu.npc + // consider RVC fetches across blocks to be non-speculative if the first + // part was non-speculative + val s0_speculative = + if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken + else true.B + s1_speculative := Mux( + io.nonDiplomatic.cpu.req.valid, + io.nonDiplomatic.cpu.req.bits.speculative, + Mux(s2_replay, s2_speculative, s0_speculative) + ) + + val s2_redirect = WireDefault(io.nonDiplomatic.cpu.req.valid) + s2_valid := false.B + when(!s2_replay) { + s2_valid := !s2_redirect + s2_pc := s1_pc + s2_speculative := s1_speculative + s2_tlb_resp := tlb.io.resp } - val force_taken = io.ptw.customCSRs.bpmStatic - when (io.ptw.customCSRs.flushBTB) { btb.io.flush := true.B } - when (force_taken) { btb.io.bht_update.valid := false.B } - - val s2_base_pc = ~(~s2_pc | (fetchBytes-1).U) - val taken_idx = Wire(UInt()) - val after_idx = Wire(UInt()) - val useRAS = WireDefault(false.B) - val updateBTB = WireDefault(false.B) - - // If !prevTaken, ras_update / bht_update is always invalid. - taken_idx := DontCare - after_idx := DontCare - - def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = { - def insnIsRVC(bits: UInt) = bits(1,0) =/= 3.U - val prevRVI = prevValid && !insnIsRVC(prevBits) - val valid = fq.io.enq.bits.mask(idx) && !prevRVI - val bits = fq.io.enq.bits.data(coreInstBits*(idx+1)-1, coreInstBits*idx) - val rvc = insnIsRVC(bits) - val rviBits = Cat(bits, prevBits) - val rviBranch = rviBits(6,0) === Instructions.BEQ.value.U.extract(6,0) - val rviJump = rviBits(6,0) === Instructions.JAL.value.U.extract(6,0) - val rviJALR = rviBits(6,0) === Instructions.JALR.value.U.extract(6,0) - val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19,15) - val rviCall = (rviJALR || rviJump) && rviBits(7) - val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ - val rvcJAL = (xLen == 32).B && bits === Instructions32.C_JAL - val rvcJump = bits === Instructions.C_J || rvcJAL - val rvcImm = Mux(bits(14), new RVCDecoder(bits, xLen).bImm.asSInt, new RVCDecoder(bits, xLen).jImm.asSInt) - val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0.U - val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7) - val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0.U - val rvcCall = rvcJAL || rvcJALR - val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits)) - val predict_taken = s2_btb_resp_bits.bht.taken || force_taken - val taken = - prevRVI && (rviJump || rviJALR || rviBranch && predict_taken) || - valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && predict_taken) - val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn) - val predictJump = prevRVI && rviJump || valid && rvcJump - val predictBranch = predict_taken && (prevRVI && rviBranch || valid && rvcBranch) - - when (s2_valid && s2_btb_resp_valid && s2_btb_resp_bits.bridx === idx.U && valid && !rvc) { - // The BTB has predicted that the middle of an RVI instruction is - // a branch! Flush the BTB and the pipeline. - btb.io.flush := true.B - fq.io.enq.bits.replay := true.B - wrong_path := true.B - ccover(wrong_path, "BTB_NON_CFI_ON_WRONG_PATH", "BTB predicted a non-branch was taken while on the wrong path") + val recent_progress_counter_init = 3.U + val recent_progress_counter = RegInit(recent_progress_counter_init) + val recent_progress = recent_progress_counter > 0.U + when(io.nonDiplomatic.ptw.req.fire && recent_progress) { recent_progress_counter := recent_progress_counter - 1.U } + when(io.nonDiplomatic.cpu.progress) { recent_progress_counter := recent_progress_counter_init } + + val s2_kill_speculative_tlb_refill = s2_speculative && !recent_progress + + tlb.io.req.valid := s1_valid && !s2_replay + def M_XRD = "b00000".U + tlb.io.req.bits.cmd := M_XRD // Frontend only reads + tlb.io.req.bits.vaddr := s1_pc + tlb.io.req.bits.passthrough := false.B + tlb.io.req.bits.size := log2Ceil(coreInstBytes * fetchWidth).U + tlb.io.req.bits.prv := io.nonDiplomatic.ptw.status.prv + tlb.io.req.bits.v := io.nonDiplomatic.ptw.status.v + tlb.io.sfence := io.nonDiplomatic.cpu.sfence + tlb.io.kill := !s2_valid || s2_kill_speculative_tlb_refill + + icache.io.req.valid := s0_valid + icache.io.req.bits.addr := io.nonDiplomatic.cpu.npc + icache.io.invalidate := io.nonDiplomatic.cpu.flush_icache + icache.io.s1_paddr := tlb.io.resp.paddr + icache.io.s2_vaddr := s2_pc + icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay + val s2_can_speculatively_refill = + s2_tlb_resp.cacheable +// && !io.nonDiplomatic.ptw.customCSRs.asInstanceOf[RocketCustomCSRs].disableSpeculativeICacheRefill + icache.io.s2_kill := s2_speculative && !s2_can_speculatively_refill || s2_xcpt + icache.io.s2_cacheable := s2_tlb_resp.cacheable + icache.io.s2_prefetch := s2_tlb_resp.prefetchable +// && !io.ptw.customCSRs +// .asInstanceOf[RocketCustomCSRs] +// .disableICachePrefetch + + fq.io.enq.valid := RegNext( + s1_valid + ) && s2_valid && (icache.io.resp.valid || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) || (!s2_tlb_resp.miss && icache.io.s2_kill)) + fq.io.enq.bits.pc := s2_pc + io.nonDiplomatic.cpu.npc := alignPC(Mux(io.nonDiplomatic.cpu.req.valid, io.nonDiplomatic.cpu.req.bits.pc, npc)) + + fq.io.enq.bits.data := icache.io.resp.bits.data + fq.io.enq.bits.mask := ((1 << fetchWidth) - 1).U << (if(log2Ceil(fetchWidth) == 0) 0.U else s2_pc(log2Ceil(fetchWidth) + log2Ceil(coreInstBytes) - 1, log2Ceil(coreInstBytes))) + fq.io.enq.bits.replay := (icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt) || (s2_kill_speculative_tlb_refill && s2_tlb_resp.miss) + fq.io.enq.bits.btb := s2_btb_resp_bits + fq.io.enq.bits.btb.taken := s2_btb_taken + fq.io.enq.bits.xcpt.ae := s2_tlb_resp.ae.inst + fq.io.enq.bits.xcpt.gf := s2_tlb_resp.gf.inst + fq.io.enq.bits.xcpt.pf := s2_tlb_resp.pf.inst +// assert( +// !(s2_speculative && io.ptw.customCSRs +// .asInstanceOf[RocketCustomCSRs] +// .disableSpeculativeICacheRefill && !icache.io.s2_kill) +// ) + when(icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae := true.B } + + btb.map { btb => + btb.io.flush := false.B + btb.io.req.valid := false.B + btb.io.req.bits.addr := s1_pc + btb.io.ras_update.valid := false.B + btb.io.ras_update.bits := DontCare + btb.io.bht_advance.valid := false.B + btb.io.bht_advance.bits := DontCare + when(!s2_replay) { + btb.io.req.valid := !s2_redirect + s2_btb_resp_valid := btb.io.resp.valid + s2_btb_resp_bits := btb.io.resp.bits + } + when(btb.io.resp.valid && btb.io.resp.bits.taken) { + def sextTo(x: UInt, n: Int): UInt = { + require(x.getWidth <= n) + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth - 1)), x) + } + + predicted_npc := sextTo(btb.io.resp.bits.target, vaddrBitsExtended) + predicted_taken := true.B } - when (!prevTaken) { - taken_idx := idx.U - after_idx := (idx + 1).U - btb.io.ras_update.valid := fq.io.enq.fire && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn)) - btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, - Mux(Mux(prevRVI, rviCall, rvcCall), CFIType.call, - Mux(Mux(prevRVI, rviBranch, rvcBranch) && !force_taken, CFIType.branch, - CFIType.jump))) - - when (!s2_btb_taken) { - when (fq.io.enq.fire && taken && !predictBranch && !predictJump && !predictReturn) { - wrong_path := true.B +// val force_taken = io.nonDiplomatic.ptw.customCSRs.bpmStatic +// when(io.nonDiplomatic.ptw.customCSRs.flushBTB) { btb.io.flush := true.B } +// when(force_taken) { btb.io.bht_update.valid := false.B } + + val s2_base_pc: UInt = ~(~s2_pc | (fetchBytes - 1).U) + val taken_idx = Wire(UInt()) + val after_idx = Wire(UInt()) + val useRAS = WireDefault(false.B) + val updateBTB = WireDefault(false.B) + + // If !prevTaken, ras_update / bht_update is always invalid. + taken_idx := DontCare + after_idx := DontCare + + def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = { + def insnIsRVC(bits: UInt) = bits(1, 0) =/= 3.U + val prevRVI = prevValid && !insnIsRVC(prevBits) + val valid = fq.io.enq.bits.mask(idx) && !prevRVI + val bits = if (coreInstBits * (idx + 1) == coreInstBits * idx) 0.U else + fq.io.enq.bits.data(coreInstBits * (idx + 1) - 1, coreInstBits * idx) + val rvc = insnIsRVC(bits) + val rviBits = Cat(bits, prevBits) + val rviBranch = rviBits(6, 0) === Instructions.BEQ.value.U(6, 0) + val rviJump = rviBits(6, 0) === Instructions.JAL.value.U(6, 0) + val rviJALR = rviBits(6, 0) === Instructions.JALR.value.U(6, 0) + val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19, 15) + val rviCall = (rviJALR || rviJump) && rviBits(7) + val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ + val rvcJAL = (xLen == 32).B && bits === Instructions32.C_JAL + val rvcJump = bits === Instructions.C_J || rvcJAL + val rvcImm = Mux(bits(14), new RVCDecoder(bits, xLen).bImm.asSInt, new RVCDecoder(bits, xLen).jImm.asSInt) + val rvcJR = bits === Instructions.C_MV && bits(6, 2) === 0.U + val rvcReturn = rvcJR && BitPat("b00?01") === bits(11, 7) + val rvcJALR = bits === Instructions.C_ADD && bits(6, 2) === 0.U + val rvcCall = rvcJAL || rvcJALR + val rviImm = Mux(rviBits(3), ImmGen(ImmGen.IMM_UJ, rviBits), ImmGen(ImmGen.IMM_SB, rviBits)) + val predict_taken = BHTResp.taken(s2_btb_resp_bits.bht) /*|| force_taken*/ + val taken = + prevRVI && (rviJump || rviJALR || rviBranch && predict_taken) || + valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && predict_taken) + val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn) + val predictJump = prevRVI && rviJump || valid && rvcJump + val predictBranch = predict_taken && (prevRVI && rviBranch || valid && rvcBranch) + + when(s2_valid && s2_btb_resp_valid && s2_btb_resp_bits.bridx === idx.U && valid && !rvc) { + // The BTB has predicted that the middle of an RVI instruction is + // a branch! Flush the BTB and the pipeline. + btb.io.flush := true.B + fq.io.enq.bits.replay := true.B + wrong_path := true.B + } + + when(!prevTaken) { + taken_idx := idx.U + after_idx := (idx + 1).U + btb.io.ras_update.valid := fq.io.enq.fire && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn)) + btb.io.ras_update.bits.cfiType := Mux( + Mux(prevRVI, rviReturn, rvcReturn), + CFIType.ret, + Mux( + Mux(prevRVI, rviCall, rvcCall), + CFIType.call, + Mux(Mux(prevRVI, rviBranch, rvcBranch) /* && !force_taken */, CFIType.branch, CFIType.jump) + ) + ) + + when(!s2_btb_taken) { + when(fq.io.enq.fire && taken && !predictBranch && !predictJump && !predictReturn) { + wrong_path := true.B + } + when(s2_valid && predictReturn) { + useRAS := true.B + } + when(s2_valid && (predictBranch || predictJump)) { + val pc: UInt = s2_base_pc | (idx * coreInstBytes).U + val npc = + if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm) + else Mux(prevRVI, pc - coreInstBytes.U, pc).asSInt + Mux(prevRVI, rviImm, rvcImm) + predicted_npc := npc.asUInt + } } - when (s2_valid && predictReturn) { - useRAS := true.B + when(prevRVI && rviBranch || valid && rvcBranch) { + btb.io.bht_advance.valid := fq.io.enq.fire && !wrong_path + btb.io.bht_advance.bits := s2_btb_resp_bits } - when (s2_valid && (predictBranch || predictJump)) { - val pc = s2_base_pc | (idx*coreInstBytes).U - val npc = - if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm) - else Mux(prevRVI, pc - coreInstBytes.U, pc).asSInt + Mux(prevRVI, rviImm, rvcImm) - predicted_npc := npc.asUInt + when( + !s2_btb_resp_valid && (predictBranch && BHTResp.strongly_taken( + s2_btb_resp_bits.bht + ) || predictJump || predictReturn) + ) { + updateBTB := true.B } } - when (prevRVI && rviBranch || valid && rvcBranch) { - btb.io.bht_advance.valid := fq.io.enq.fire && !wrong_path - btb.io.bht_advance.bits := s2_btb_resp_bits - } - when (!s2_btb_resp_valid && (predictBranch && s2_btb_resp_bits.bht.strongly_taken || predictJump || predictReturn)) { - updateBTB := true.B - } - } - if (idx == fetchWidth-1) { - when (fq.io.enq.fire) { - s2_partial_insn_valid := false.B - when (valid && !prevTaken && !rvc) { - s2_partial_insn_valid := true.B - s2_partial_insn := bits | 0x3.U + if (idx == fetchWidth - 1) { + when(fq.io.enq.fire) { + s2_partial_insn_valid := false.B + when(valid && !prevTaken && !rvc) { + s2_partial_insn_valid := true.B + s2_partial_insn := bits | 0x3.U + } } + prevTaken || taken + } else { + scanInsns(idx + 1, valid, bits, prevTaken || taken) } - prevTaken || taken - } else { - scanInsns(idx + 1, valid, bits, prevTaken || taken) } - } - when (!io.cpu.btb_update.valid) { - val fetch_bubble_likely = !fq.io.mask(1) - btb.io.btb_update.valid := fq.io.enq.fire && !wrong_path && fetch_bubble_likely && updateBTB - btb.io.btb_update.bits.prediction.entry := tileParams.btb.get.nEntries.U - btb.io.btb_update.bits.isValid := true.B - btb.io.btb_update.bits.cfiType := btb.io.ras_update.bits.cfiType - btb.io.btb_update.bits.br_pc := s2_base_pc | (taken_idx << log2Ceil(coreInstBytes)) - btb.io.btb_update.bits.pc := s2_base_pc - } + when(!io.nonDiplomatic.cpu.btb_update.valid) { + val fetch_bubble_likely = !fq.io.mask(1) + btb.io.btb_update.valid := fq.io.enq.fire && !wrong_path && fetch_bubble_likely && updateBTB + btb.io.btb_update.bits.prediction.entry := tileParams.btb.get.nEntries.U + btb.io.btb_update.bits.isValid := true.B + btb.io.btb_update.bits.cfiType := btb.io.ras_update.bits.cfiType + btb.io.btb_update.bits.br_pc := s2_base_pc | (taken_idx << log2Ceil(coreInstBytes)) + btb.io.btb_update.bits.pc := s2_base_pc + } - btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes)) + btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes)) - val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B) - when (useRAS) { - predicted_npc := btb.io.ras_head.bits - } - when (fq.io.enq.fire && (s2_btb_taken || taken)) { - s2_partial_insn_valid := false.B - } - when (!s2_btb_taken) { - when (taken) { - fq.io.enq.bits.btb.bridx := taken_idx - fq.io.enq.bits.btb.taken := true.B - fq.io.enq.bits.btb.entry := tileParams.btb.get.nEntries.U - when (fq.io.enq.fire) { s2_redirect := true.B } + val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B) + when(useRAS) { + predicted_npc := btb.io.ras_head.bits + } + when(fq.io.enq.fire && (s2_btb_taken || taken)) { + s2_partial_insn_valid := false.B + } + when(!s2_btb_taken) { + when(taken) { + fq.io.enq.bits.btb.bridx := taken_idx + fq.io.enq.bits.btb.taken := true.B + fq.io.enq.bits.btb.entry := tileParams.btb.get.nEntries.U + when(fq.io.enq.fire) { s2_redirect := true.B } + } } - } - assert(!s2_partial_insn_valid || fq.io.enq.bits.mask(0)) - when (s2_redirect) { s2_partial_insn_valid := false.B } - when (io.cpu.req.valid) { wrong_path := false.B } - } + assert(!s2_partial_insn_valid || fq.io.enq.bits.mask(0)) + when(s2_redirect) { s2_partial_insn_valid := false.B } + when(io.nonDiplomatic.cpu.req.valid) { wrong_path := false.B } + } - io.cpu.resp <> fq.io.deq + io.nonDiplomatic.cpu.resp <> fq.io.deq - // supply guest physical address to commit stage - val gpa_valid = Reg(Bool()) - val gpa = Reg(UInt(vaddrBitsExtended.W)) - when (fq.io.enq.fire && s2_tlb_resp.gf.inst) { - when (!gpa_valid) { - gpa := s2_tlb_resp.gpa + // supply guest physical address to commit stage + val gpa_valid = Reg(Bool()) + val gpa = Reg(UInt(vaddrBitsExtended.W)) + when(fq.io.enq.fire && s2_tlb_resp.gf.inst) { + when(!gpa_valid) { + gpa := s2_tlb_resp.gpa + } + gpa_valid := true.B } - gpa_valid := true.B - } - when (io.cpu.req.valid) { - gpa_valid := false.B - } - io.cpu.gpa.valid := gpa_valid - io.cpu.gpa.bits := gpa + when(io.nonDiplomatic.cpu.req.valid) { + gpa_valid := false.B + } + io.nonDiplomatic.cpu.gpa.valid := gpa_valid + io.nonDiplomatic.cpu.gpa.bits := gpa - // performance events - io.cpu.perf.acquire := icache.io.perf.acquire - io.cpu.perf.tlbMiss := io.ptw.req.fire - io.errors := icache.io.errors + // performance events + io.nonDiplomatic.cpu.perf.acquire := icache.io.perf.acquire + io.nonDiplomatic.cpu.perf.tlbMiss := io.nonDiplomatic.ptw.req.fire + io.nonDiplomatic.errors := icache.io.errors - // gate the clock - clock_en_reg := !rocketParams.clockGate.B || - io.cpu.might_request || // chicken bit + // gate the clock + clock_en_reg := !rocketParams.clockGate.B || + io.nonDiplomatic.cpu.might_request || // chicken bit icache.io.keep_clock_enabled || // I$ miss or ITIM access s1_valid || s2_valid || // some fetch in flight !tlb.io.req.ready || // handling TLB miss - !fq.io.mask(fq.io.mask.getWidth-1) // queue not full + !fq.io.mask(fq.io.mask.getWidth - 1) // queue not full } // leaving gated-clock domain - - def alignPC(pc: UInt) = ~(~pc | (coreInstBytes - 1).U) - - def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - property.cover(cond, s"FRONTEND_$label", "Rocket;;" + desc) -} - -/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ -trait HasICacheFrontend extends CanHavePTW { this: BaseTile => - val module: HasICacheFrontendModule - val frontend = LazyModule(new Frontend(tileParams.icache.get, tileId)) - tlMasterXbar.node := TLWidthWidget(tileParams.icache.get.rowBits/8) := frontend.masterNode - connectTLSlave(frontend.slaveNode, tileParams.core.fetchBytes) - frontend.icache.hartIdSinkNodeOpt.foreach { _ := hartIdNexusNode } - frontend.icache.mmioAddressPrefixSinkNodeOpt.foreach { _ := mmioAddressPrefixNexusNode } - frontend.resetVectorSinkNode := resetVectorNexusNode - nPTWPorts += 1 - - // This should be a None in the case of not having an ITIM address, when we - // don't actually use the device that is instantiated in the frontend. - private val deviceOpt = if (tileParams.icache.get.itimAddr.isDefined) Some(frontend.icache.device) else None -} - -trait HasICacheFrontendModule extends CanHavePTWModule { - val outer: HasICacheFrontend - ptwPorts += outer.frontend.module.io.ptw } diff --git a/rocketv/src/ImmGen.scala b/rocketv/src/ImmGen.scala new file mode 100644 index 000000000..d78ab20ac --- /dev/null +++ b/rocketv/src/ImmGen.scala @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv + +import chisel3._ +import chisel3.util.Cat + +object ImmGen { + def IMM_S = 0.U(3.W) + def IMM_SB = 1.U(3.W) + def IMM_U = 2.U(3.W) + def IMM_UJ = 3.U(3.W) + def IMM_I = 4.U(3.W) + def IMM_Z = 5.U(3.W) + + def apply(sel: UInt, inst: UInt) = { + val sign = Mux(sel === IMM_Z, 0.S, inst(31).asSInt) + val b30_20 = Mux(sel === IMM_U, inst(30, 20).asSInt, sign) + val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19, 12).asSInt) + val b11 = Mux( + sel === IMM_U || sel === IMM_Z, + 0.S, + Mux(sel === IMM_UJ, inst(20).asSInt, Mux(sel === IMM_SB, inst(7).asSInt, sign)) + ) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, 0.U, inst(30, 25)) + val b4_1 = Mux( + sel === IMM_U, + 0.U, + Mux(sel === IMM_S || sel === IMM_SB, inst(11, 8), Mux(sel === IMM_Z, inst(19, 16), inst(24, 21))) + ) + val b0 = Mux(sel === IMM_S, inst(7), Mux(sel === IMM_I, inst(20), Mux(sel === IMM_Z, inst(15), 0.U))) + + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt + } +} From 7e0809b6f9eea1ffbf1e348ae0b24dbb3556e7c4 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Tue, 2 Jul 2024 19:52:08 +0800 Subject: [PATCH 075/140] [rocketv] add elaborator for Frontend - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.Frontend config --useAsyncReset false --clockGate true --xLen 32 --usingAtomics true --usingDataScratchpad false --usingVM false --usingCompressed true --usingBTB false --itlbNSets 1 --itlbNWays 32 --itlbNSectors 4 --itlbNSuperpageEntries 4 --blockBytes 32 --iCacheNSets 64 --iCacheNWays 4 --iCachePrefetch false --btbEntries 28 --btbNMatchBits 14 --btbUpdatesOutOfOrder false --nPages 6 --nRAS 6 --nPMPs 8 --paddrBits 32 --pgLevels 2 --asidBits 0 --bhtNEntries 512 --bhtCounterLength 1 --bhtHistoryLength 8 --bhtHistoryBits 3 --legal 00000000-ffffffff --cacheable 80000000-ffffffff --read 00000000-ffffffff --write 00000000-ffffffff --putPartial 00000000-ffffffff --exec 80000000-ffffffff --sideEffects 00000000-3fffffff - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.Frontend design --parameter ./Frontend.json --run-firtool --- elaborator/src/rocketv/Frontend.scala | 138 ++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 elaborator/src/rocketv/Frontend.scala diff --git a/elaborator/src/rocketv/Frontend.scala b/elaborator/src/rocketv/Frontend.scala new file mode 100644 index 000000000..c36c94993 --- /dev/null +++ b/elaborator/src/rocketv/Frontend.scala @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{BHTParameter, Frontend, FrontendParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object Frontend extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class FrontendParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "xLen") xLen: Int, + @arg(name = "usingAtomics") usingAtomics: Boolean, + @arg(name = "usingDataScratchpad") usingDataScratchpad: Boolean, + @arg(name = "usingVM") usingVM: Boolean, + @arg(name = "usingCompressed") usingCompressed: Boolean, + @arg(name = "usingBTB") usingBTB: Boolean, + @arg(name = "itlbNSets") itlbNSets: Int, + @arg(name = "itlbNWays") itlbNWays: Int, + @arg(name = "itlbNSectors") itlbNSectors: Int, + @arg(name = "itlbNSuperpageEntries") itlbNSuperpageEntries: Int, + @arg(name = "blockBytes") blockBytes: Int, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean, + @arg(name = "btbEntries") btbEntries: Int, + @arg(name = "btbNMatchBits") btbNMatchBits: Int, + @arg(name = "btbUpdatesOutOfOrder") btbUpdatesOutOfOrder: Boolean, + @arg(name = "nPages") nPages: Int, + @arg(name = "nRAS") nRAS: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "pgLevels") pgLevels: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "bhtNEntries") bhtNEntries: Option[Int], + @arg(name = "bhtCounterLength") bhtCounterLength: Option[Int], + @arg(name = "bhtHistoryLength") bhtHistoryLength: Option[Int], + @arg(name = "bhtHistoryBits") bhtHistoryBits: Option[Int], + @arg(name = "legal") legal: Seq[BitSet], + @arg(name = "cacheable") cacheable: Seq[BitSet], + @arg(name = "read") read: Seq[BitSet], + @arg(name = "write") write: Seq[BitSet], + @arg(name = "putPartial") putPartial: Seq[BitSet], + @arg(name = "logic") logic: Seq[BitSet], + @arg(name = "arithmetic") arithmetic: Seq[BitSet], + @arg(name = "exec") exec: Seq[BitSet], + @arg(name = "sideEffects") sideEffects: Seq[BitSet]) { + def convert: FrontendParameter = FrontendParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingBTB: Boolean, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + blockBytes: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + nPMPs: Int, + paddrBits: Int, + pgLevels: Int, + asidBits: Int, + bhtNEntries + .lazyZip(bhtCounterLength) + .lazyZip(bhtHistoryLength) + .lazyZip(bhtHistoryBits) + .map { + case (bhtNEntries, bhtCounterLength, bhtHistoryLength, bhtHistoryBits) => + BHTParameter(bhtNEntries, bhtCounterLength, bhtHistoryLength, bhtHistoryBits) + } + .headOption, + legal.foldLeft(BitSet.empty)(_.union(_)), + cacheable.foldLeft(BitSet.empty)(_.union(_)), + read.foldLeft(BitSet.empty)(_.union(_)), + write.foldLeft(BitSet.empty)(_.union(_)), + putPartial.foldLeft(BitSet.empty)(_.union(_)), + logic.foldLeft(BitSet.empty)(_.union(_)), + arithmetic.foldLeft(BitSet.empty)(_.union(_)), + exec.foldLeft(BitSet.empty)(_.union(_)), + sideEffects.foldLeft(BitSet.empty)(_.union(_)) + ) + } + + implicit def FrontendParameterMainParser: ParserForClass[FrontendParameterMain] = + ParserForClass[FrontendParameterMain] + + @main + def config(@arg(name = "parameter") parameter: FrontendParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[Frontend, FrontendParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 1e257bae6530809149fffbbc3c868fae1788f7fe Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Tue, 2 Jul 2024 23:55:51 +0800 Subject: [PATCH 076/140] [rocketv] copy RocketCore into rocketv project --- rocketv/src/RocketCore.scala | 1318 ++++++++++++++++++++++++++++++++++ 1 file changed, 1318 insertions(+) create mode 100644 rocketv/src/RocketCore.scala diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala new file mode 100644 index 000000000..b1aa9294d --- /dev/null +++ b/rocketv/src/RocketCore.scala @@ -0,0 +1,1318 @@ +// See chipsalliance:rocket-chip LICENSE.Berkeley for license details. +// See chipsalliance:rocket-chip LICENSE.SiFive for license details. + +package org.chipsalliance.t1.rocketcore + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.decode.DecodeBundle +import freechips.rocketchip.tile.TileInterrupts +import freechips.rocketchip.util._ +import org.chipsalliance.cde.config.{Field, Parameters} +import org.chipsalliance.t1.rockettile.{VectorRequest, VectorResponse} + +import scala.collection.mutable.ArrayBuffer + +// TODO: remove it. +import freechips.rocketchip.rocket.{Causes, MulDivParams, RocketCoreParams} +import freechips.rocketchip.tile.{FPUCoreIO, HasCoreParameters} + +trait HasRocketCoreParameters extends HasCoreParameters { + lazy val rocketParams: RocketCoreParams = tileParams.core.asInstanceOf[RocketCoreParams] + + val fastLoadWord = rocketParams.fastLoadWord + val fastLoadByte = rocketParams.fastLoadByte + + val mulDivParams = rocketParams.mulDiv.getOrElse(MulDivParams()) // TODO ask andrew about this + + val aluFn = new ALUFN + + require(!fastLoadByte || fastLoadWord) + require(!rocketParams.haveFSDirty, "rocket doesn't support setting fs dirty from outside, please disable haveFSDirty") + require(!usingConditionalZero, "Zicond is not yet implemented in ABLU") +} +class CoreInterrupts(val hasBeu: Boolean)(implicit p: Parameters) extends TileInterrupts()(p) { + val buserror = Option.when(hasBeu)(Bool()) +} + +class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters) + extends Module + with HasRocketCoreParameters { + // Checker + require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth) + require(!(coreParams.useRVE && coreParams.fpu.nonEmpty), "Can't select both RVE and floating-point") + require(!(coreParams.useRVE && coreParams.useHypervisor), "Can't select both RVE and Hypervisor") + + // Parameters + val pipelinedMul: Boolean = usingMulDiv && mulDivParams.mulUnroll == xLen + val decoder: InstructionDecoder = new org.chipsalliance.t1.rocketcore.InstructionDecoder( + org.chipsalliance.t1.rocketcore.InstructionDecoderParameter( + (org.chipsalliance.rvdecoderdb.instructions(org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)) ++ + org.chipsalliance.t1.rocketcore.CustomInstructions.rocketSet).filter { i => + i.instructionSets.map(_.name) match { + // I + case s if s.contains("rv_i") => true + case s if s.contains("rv32_i") => xLen == 32 + case s if s.contains("rv64_i") => xLen == 64 + // M + case s if s.contains("rv_m") => usingMulDiv + case s if s.contains("rv64_m") => (xLen == 64) && usingMulDiv + // A + case s if s.contains("rv_a") => usingAtomics + case s if s.contains("rv64_a") => (xLen == 64) && usingAtomics + // ZICSR + case s if s.contains("rv_zicsr") => true + // ZIFENCEI + case s if s.contains("rv_zifencei") => true + // F + case s if s.contains("rv_f") => !(fLen == 0) + case s if s.contains("rv64_f") => (xLen == 64) && !(fLen == 0) + // D + case s if s.contains("rv_d") => fLen == 64 + case s if s.contains("rv64_d") => (xLen == 64) && (fLen == 64) + // ZFH + case s if s.contains("rv_zfh") => minFLen == 16 + case s if s.contains("rv64_zfh") => (xLen == 64) && (minFLen == 16) + case s if s.contains("rv_d_zfh") => (fLen == 64) && (minFLen == 16) + + // Priv + case s if s.contains("rv_system") => true + // Supervisor + case s if s.contains("rv_s") => + i.name match { + // if support superviosr but don't support virtual memory, raise illinstr. + case s if s.contains("sfence.vma") => usingVM + case s if s.contains("sret") => usingSupervisor + } + case s if s.contains("rv_smrnmi") => usingNMI + // Hypervisor + case s if s.contains("rv_h") => usingHypervisor + case s if s.contains("rv64_h") => (xLen == 64) && usingHypervisor + // Debug + case s if s.contains("rv_sdext") => usingDebug + + // T1 Vector + case s if s.contains("rv_v") => usingVector + // unratified but supported. + case s if s.contains("rv_zicond") => usingConditionalZero + // custom + case s if s.contains("rv_rocket") => + i.name match { + case "c.flush.d.l1" => coreParams.haveCFlush + case "c.discard.d.l1" => coreParams.haveCFlush + case "cease" => rocketParams.haveCease + } + case _ => false + } + }.filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + }.toSeq.distinct, + pipelinedMul, + flushOnFenceI + ) + ) + val lgNXRegs: Int = if (coreParams.useRVE) 4 else 5 + val regAddrMask: Int = (1 << lgNXRegs) - 1 + + val hartid = IO(Input(UInt(hartIdLen.W))) + val interrupts = IO(Input(new CoreInterrupts(hasBeu))) + val imem = IO(new FrontendIO) + val dmem = IO(new HellaCacheIO) + val ptw = IO(Flipped(new DatapathPTWIO())) + val fpu = Option.when(usingFPU)(IO(Flipped(new FPUCoreIO()))) + val bpwatch = IO(Output(Vec(coreParams.nBreakpoints, new BPWatch(coreParams.retireWidth)))) + val cease = IO(Output(Bool())) + val wfi = IO(Output(Bool())) + val traceStall = IO(Input(Bool())) + val t1Request = Option.when(usingVector)(IO(Valid(new VectorRequest(xLen)))) + val t1Response = Option.when(usingVector)(IO(Flipped(Decoupled(new VectorResponse(xLen))))) + // logic for T1 + val t1IssueQueueRelease = Option.when(usingVector)(IO(Input(Bool()))) + + // Signal outside from internal clock domain. + + val longLatencyStall = Reg(Bool()) + val idRegPause = Reg(Bool()) + val imemMightRequestReg = Reg(Bool()) + val clockEnable = WireDefault(true.B) + val clockEnableReg = RegInit(true.B) + val gatedClock = + Option.when(rocketParams.clockGate)(ClockGate(clock, clockEnable, "rocket_clock_gate")).getOrElse(clock) + // leaving gated-clock domain + val gatedDomain = withClock(gatedClock)(new Gated) + + class Gated { + // performance counters + def pipelineIDToWB[T <: Data](x: T): T = RegEnable(RegEnable(RegEnable(x, !ctrlKilled), exPcValid), memPcValid) + // TODO: remove it and probe signal to verification modules + // format: off + val perfEvents: EventSets = new EventSets( + Seq( + new EventSet( + (mask, hits) => Mux(wbException, mask(0), wbValid && pipelineIDToWB((mask & hits).orR)), + Seq( + ("exception", () => false.B), + // TODO: why no FPU here? + ("load", () => idDecodeOutput(decoder.mem) && idDecodeOutput(decoder.memCommand) === M_XRD && !Option.when(usingFPU)(idDecodeOutput(decoder.fp)).getOrElse(false.B)), + ("store", () => idDecodeOutput(decoder.mem) && idDecodeOutput(decoder.memCommand) === M_XWR && !Option.when(usingFPU)(idDecodeOutput(decoder.fp)).getOrElse(false.B)), + ("system", () => idDecodeOutput(decoder.csr) =/= CSR.N), + ("arith", () => idDecodeOutput(decoder.wxd) && !( idDecodeOutput(decoder.isJal) || idDecodeOutput(decoder.isJalr) || idDecodeOutput(decoder.mem) || Option.when(usingFPU)(idDecodeOutput(decoder.fp)).getOrElse(false.B) || Option.when(usingMulDiv && pipelinedMul)(idDecodeOutput(decoder.mul)).getOrElse(false.B) || Option.when(usingMulDiv)(idDecodeOutput(decoder.div)).getOrElse(false.B) || idDecodeOutput(decoder.csr) =/= CSR.N )), + ("branch", () => idDecodeOutput(decoder.isBranch)), + ("jal", () => idDecodeOutput(decoder.isJal)), + ("jalr", () => idDecodeOutput(decoder.isJalr)) + ) ++ + Option.when(usingAtomics)(Seq( + ("amo", () => idDecodeOutput(decoder.mem) && (isAMO(idDecodeOutput(decoder.memCommand)) || idDecodeOutput(decoder.memCommand).isOneOf(M_XLR, M_XSC))) + )).getOrElse(Seq()) ++ + Option.when(usingMulDiv)(Seq( + ("mul", () => if (pipelinedMul) idDecodeOutput(decoder.mul) else idDecodeOutput(decoder.div) && (idDecodeOutput(decoder.aluFn) & aluFn.FN_DIV) =/= aluFn.FN_DIV), + ("div", () => if (pipelinedMul) idDecodeOutput(decoder.div) else idDecodeOutput(decoder.div) && (idDecodeOutput(decoder.aluFn) & aluFn.FN_DIV) === aluFn.FN_DIV) + )).getOrElse(Seq()) ++ + fpu.map(fpu => Seq( + ("fp load", () => idDecodeOutput(decoder.fp) && fpu.dec.ldst && fpu.dec.wen), + ("fp store", () => idDecodeOutput(decoder.fp) && fpu.dec.ldst && !fpu.dec.wen), + ("fp add", () => idDecodeOutput(decoder.fp) && fpu.dec.fma && fpu.dec.swap23), + ("fp mul", () => idDecodeOutput(decoder.fp) && fpu.dec.fma && !fpu.dec.swap23 && !fpu.dec.ren3), + ("fp mul-add", () => idDecodeOutput(decoder.fp) && fpu.dec.fma && fpu.dec.ren3), + ("fp div/sqrt", () => idDecodeOutput(decoder.fp) && (fpu.dec.div || fpu.dec.sqrt)), + ("fp other", () => idDecodeOutput(decoder.fp) && !(fpu.dec.ldst || fpu.dec.fma || fpu.dec.div || fpu.dec.sqrt )) + )).getOrElse(Seq()) + ), + new EventSet( + (mask, hits) => (mask & hits).orR, + Seq( + ("load-use interlock", () => idExHazard && exRegDecodeOutput(decoder.mem) || idMemHazard && memRegDecodeOutput(decoder.mem) || idWbHazard && wbRegDecodeOutput(decoder.mem) ), + ("long-latency interlock", () => idScoreboardHazard), + ("csr interlock", () => idExHazard && exRegDecodeOutput(decoder.csr) =/= CSR.N || idMemHazard && memRegDecodeOutput(decoder.csr) =/= CSR.N || idWbHazard && wbRegDecodeOutput(decoder.csr) =/= CSR.N), + ("I$ blocked", () => icacheBlocked), + ("D$ blocked", () => idDecodeOutput(decoder.mem) && dcacheBlocked), + ("branch misprediction", () => takePcMem && memDirectionMisprediction), + ("control-flow target misprediction", () => takePcMem && memMisprediction && memCfi && !memDirectionMisprediction && !icacheBlocked), + ("flush", () => wbRegFlushPipe), + ("replay", () => replayWb) + ) ++ + Option.when(usingMulDiv)(Seq( + ("mul/div interlock", () => idExHazard && (Option.when(pipelinedMul)(exRegDecodeOutput(decoder.mul)).getOrElse(false.B) || exRegDecodeOutput(decoder.div)) || idMemHazard && (Option.when(pipelinedMul)(memRegDecodeOutput(decoder.mul)).getOrElse(false.B) || memRegDecodeOutput(decoder.div)) || idWbHazard && wbRegDecodeOutput(decoder.div)) + )).getOrElse(Seq()) ++ + Option.when(usingFPU)(Seq( + ("fp interlock", () => idExHazard && exRegDecodeOutput(decoder.fp) || idMemHazard && memRegDecodeOutput(decoder.fp) || idWbHazard && wbRegDecodeOutput(decoder.fp) || idDecodeOutput(decoder.fp) && idStallFpu) + )).getOrElse(Seq()) + ), + new EventSet( + (mask, hits) => (mask & hits).orR, + Seq( + ("I$ miss", () => imem.perf.acquire), + ("D$ miss", () => dmem.perf.acquire), + ("D$ release", () =>dmem.perf.release), + ("ITLB miss", () => imem.perf.tlbMiss), + ("DTLB miss", () => dmem.perf.tlbMiss), + ("L2 TLB miss", () => ptw.perf.l2miss) + ) + ) + ) + ) + // format: on + + // Start RTL Here + // instantiate modules + // TODO: remove implicit parameter for them. + + val csr: CSRFile = Module(new CSRFile(perfEvents, coreParams.customCSRs.decls, hasBeu)) + + // TODO: move to Parameter Level or LazyModule level. + /** Decoder instantiated, input from IF, output to ID. */ + val decoderModule = Module(new RawModule { + override def desiredName: String = "RocketDecoder" + val instruction = IO(Input(UInt(32.W))) + val output = IO(Output(decoder.table.bundle)) + output := decoder.table.decode(instruction) + }) + val instructionBuffer: IBuf = Module(new IBuf) + val breakpointUnit: BreakpointUnit = Module(new BreakpointUnit(nBreakpoints)) + val arithmeticLogicUnit: ALU = Module(new ALU()) + val muldiv = Module( + new MulDiv(if (pipelinedMul) mulDivParams.copy(mulUnroll = 0) else mulDivParams, width = xLen, aluFn = aluFn) + ).suggestName(if (pipelinedMul) "div" else "muldiv") + val mul = pipelinedMul.option(Module(new PipelinedMultiplier(xLen, 2, aluFn = aluFn))) + // RF is not a Module. + val rf = new RegFile(regAddrMask, xLen) + + // wire definations. + + val idDecodeOutput: DecodeBundle = Wire(decoder.table.bundle) + + val exRegExceptionInterrupt: Bool = Reg(Bool()) + val exRegException: Bool = Reg(Bool()) + val exRegValid: Bool = Reg(Bool()) + val exRegRVC: Bool = Reg(Bool()) + val exRegBTBResponse: BTBResp = Reg(new BTBResp) + val exRegFlushPipe: Bool = Reg(Bool()) + val exRegLoadUse: Bool = Reg(Bool()) + val exRegCause: UInt = Reg(UInt()) + val exRegReplay: Bool = Reg(Bool()) + val exRegPC: UInt = Reg(UInt()) + // TODO: add real width here. + val exRegMemSize: UInt = Reg(UInt()) + // Option.when(usingHypervisor) + val exRegHLS: Bool = Reg(Bool()) + val exRegInstruction: UInt = Reg(Bits()) + val exRegRawInstruction: UInt = Reg(UInt()) + // TODO: what's this? + val exRegWphit: Vec[Bool] = Reg(Vec(nBreakpoints, Bool())) + val exRegDecodeOutput: DecodeBundle = Reg(decoder.table.bundle) + + val memRegExceptionInterrupt = Reg(Bool()) + val memRegValid = Reg(Bool()) + val memRegRVC = Reg(Bool()) + val memRegBTBResponse = Reg(new BTBResp) + val memRegException = Reg(Bool()) + val memRegReplay = Reg(Bool()) + val memRegFlushPipe = Reg(Bool()) + val memRegCause = Reg(UInt()) + val memRegSlowBypass = Reg(Bool()) + val memRegLoad = Reg(Bool()) + val memRegStore = Reg(Bool()) + val memRegSfence = Reg(Bool()) + val memRegPc = Reg(UInt()) + val memRegInstruction = Reg(Bits()) + val memRegMemSize = Reg(UInt()) + val memRegDecodeOutput: DecodeBundle = Reg(decoder.table.bundle) + + /** virtualization mode? */ + val memRegHlsOrDv = Reg(Bool()) + val memRegRawInstruction = Reg(UInt()) + val memRegWdata = Reg(Bits()) + val memRegRS2 = Reg(Bits()) + val memBranchTaken = Reg(Bool()) + val takePcMem = Wire(Bool()) + val memRegWphit = Reg(Vec(nBreakpoints, Bool())) + + val wbRegValid = Reg(Bool()) + val wbRegException = Reg(Bool()) + val wbRegReplay = Reg(Bool()) + val wbRegFlushPipe = Reg(Bool()) + val wbRegCause = Reg(UInt()) + val wbRegSfence = Reg(Bool()) + val wbRegPc = Reg(UInt()) + val wbRegDecodeOutput: DecodeBundle = Reg(decoder.table.bundle) + val wbRegMemSize = Reg(UInt()) + val wbRegHlsOrDv = Reg(Bool()) + val wbRegHfenceV = Reg(Bool()) + val wbRegHfenceG = Reg(Bool()) + val wbRegInstruction = Reg(Bits()) + val wbRegRawInstruction = Reg(UInt()) + val wbRegWdata = Reg(Bits()) + val wbRegRS2 = Reg(Bits()) + val wbRegWphit = Reg(Vec(nBreakpoints, Bool())) + val takePcWb = Wire(Bool()) + + val takePcMemWb = takePcWb || takePcMem + val takePc = takePcMemWb + + // From IBUF to ID + instructionBuffer.io.imem <> imem.resp + val instructionBufferOut: DecoupledIO[Instruction] = instructionBuffer.io.inst.head + // TODO: does these really has its meaning? I don't think so:( + val idExpandedInstruction: ExpandedInstruction = instructionBufferOut.bits.inst + val idRawInstruction: UInt = instructionBufferOut.bits.raw + val idInstruction: UInt = idExpandedInstruction.bits + idDecodeOutput := decoderModule.output + instructionBuffer.io.kill := takePc + // 5. Instruction goes to Rocket Decoder + decoderModule.instruction := idInstruction + + // Optional circuit: Optional add this circuit for RVE. + def decodeReg(x: UInt): (Bool, UInt) = (x.extract(x.getWidth - 1, lgNXRegs).asBool, x(lgNXRegs - 1, 0)) + val (idRaddr3Illegal: Bool, idRaddr3: UInt) = decodeReg(idExpandedInstruction.rs3) + val (idRaddr2Illegal: Bool, idRaddr2: UInt) = decodeReg(idExpandedInstruction.rs2) + val (idRaddr1Illegal: Bool, idRaddr1: UInt) = decodeReg(idExpandedInstruction.rs1) + val (idWaddrIllegal: Bool, idWaddr: UInt) = decodeReg(idExpandedInstruction.rd) + + val idLoadUse: Bool = Wire(Bool()) + val idRegFence: Bool = RegInit(false.B) + // TODO: T1 needs to access RS1 and RS2 under some instructions. + // FP goes to a different path, decoder.rfs1 is never used... + val idRen: Seq[Bool] = IndexedSeq(idDecodeOutput(decoder.rxs1), idDecodeOutput(decoder.rxs2)) + val idRaddr: Seq[UInt] = IndexedSeq(idRaddr1, idRaddr2) + // 6. Read RF out. + val idRs: Seq[UInt] = idRaddr.map(rf.read) + // instruction get killed at exec stage if true. + val ctrlKilled: Bool = Wire(Bool()) + + // TODO: additional decode out? + val idCsrEn: Bool = idDecodeOutput(decoder.csr).isOneOf(CSR.S, CSR.C, CSR.W) + val idSystemInstruction: Bool = idDecodeOutput(decoder.csr) === CSR.I + val idCsrRen: Bool = idDecodeOutput(decoder.csr).isOneOf(CSR.S, CSR.C) && idExpandedInstruction.rs1 === 0.U + val idCsr = + Mux(idSystemInstruction && idDecodeOutput(decoder.mem), CSR.N, Mux(idCsrRen, CSR.R, idDecodeOutput(decoder.csr))) + val idCsrFlush = + idSystemInstruction || + (idCsrEn && !idCsrRen && csr.io.decode(0).writeFlush) || + Option.when(usingVector)(idDecodeOutput(decoder.vectorCSR)).getOrElse(false.B) + val idRfIllegal: Bool = + idRaddr2Illegal && idDecodeOutput(decoder.rxs2) || + idRaddr1Illegal && idDecodeOutput(decoder.rxs1) || + idWaddrIllegal && idDecodeOutput(decoder.wxd) + val idCsrIllegalRW: Bool = + idCsrEn && (csr.io.decode(0).readIllegal || !idCsrRen && csr.io.decode(0).writeIllegal) + val idSystemIllegal: Bool = + !instructionBufferOut.bits.rvc && (idSystemInstruction && csr.io.decode(0).systemIllegal) + + val idAtomicIllegal: Option[Bool] = + Option.when(usingAtomics)(idDecodeOutput(decoder.amo) && !csr.io.status.isa('a' - 'a')) + val idMulDivIllegal: Option[Bool] = + Option.when(usingMulDiv)( + Option.when(pipelinedMul)(idDecodeOutput(decoder.mul)).getOrElse(false.B) || + idDecodeOutput(decoder.div) && !csr.io.status.isa('m' - 'a') + ) + val idCompressIllegal: Option[Bool] = + Option.when(usingCompressed)(instructionBufferOut.bits.rvc && !csr.io.status.isa('c' - 'a')) + val idFpIllegal: Option[Bool] = + fpu.map(fpu => idDecodeOutput(decoder.fp) && (csr.io.decode(0).fpIllegal || fpu.illegal_rm)) + val idDpIllegal: Option[Bool] = Option.when(usingFPU)(idDecodeOutput(decoder.dp) && !csr.io.status.isa('d' - 'a')) + + // TODO: vector illegal: + // - vector is not enabled but a vector instruction is decoded. + val idIllegalInstruction: Bool = + !idDecodeOutput(decoder.isLegal) || + idRfIllegal || + idCsrIllegalRW || + idSystemIllegal || + idMulDivIllegal.getOrElse(false.B) || + idAtomicIllegal.getOrElse(false.B) || + idFpIllegal.getOrElse(false.B) || + idDpIllegal.getOrElse(false.B) || + idCompressIllegal.getOrElse(false.B) + val idVirtualInstruction: Bool = + idDecodeOutput(decoder.isLegal) && + ( + (idCsrEn && + !(!idCsrRen && csr.io.decode(0).writeIllegal) && + csr.io.decode(0).virtualAccessIllegal) || ( + !instructionBufferOut.bits.rvc && + idSystemInstruction && + csr.io.decode(0).virtualSystemIllegal + ) + ) + + // stall decode for fences (now, for AMO.rl; later, for AMO.aq and FENCE) + val idAmoAquire: Bool = idInstruction(26) + val idAmoRelease: Bool = idInstruction(25) + // TODO: what's this? + val idFenceSucc: UInt = idInstruction(23, 20) + val idFenceNext: Bool = idDecodeOutput(decoder.fence) || idDecodeOutput(decoder.amo) && idAmoAquire + val idMemoryBusy: Bool = !dmem.ordered || dmem.req.valid + val idDoFence = + idMemoryBusy && + (idDecodeOutput(decoder.amo) && idAmoRelease || + idDecodeOutput(decoder.fenceI) || + idRegFence && idDecodeOutput(decoder.mem)) + + // TODO: if vector is non-empty, don't take breakpoint. + breakpointUnit.io.status := csr.io.status + breakpointUnit.io.bp := csr.io.bp + breakpointUnit.io.pc := instructionBuffer.io.pc + breakpointUnit.io.ea := memRegWdata + breakpointUnit.io.mcontext := csr.io.mcontext + breakpointUnit.io.scontext := csr.io.scontext + + val idException0 = instructionBufferOut.bits.xcpt0 + val idException1 = instructionBufferOut.bits.xcpt1 + val (idException, idCause) = checkExceptions( + List( + (csr.io.interrupt, csr.io.interruptCause), + (breakpointUnit.io.debug_if, CSR.debugTriggerCause.U), + (breakpointUnit.io.xcpt_if, Causes.breakpoint.U), + (idException0.pf.inst, Causes.fetch_page_fault.U), + (idException0.gf.inst, Causes.fetch_guest_page_fault.U), + (idException0.ae.inst, Causes.fetch_access.U), + (idException1.pf.inst, Causes.fetch_page_fault.U), + (idException1.gf.inst, Causes.fetch_guest_page_fault.U), + (idException1.ae.inst, Causes.fetch_access.U), + (idVirtualInstruction, Causes.virtual_instruction.U), + (idIllegalInstruction, Causes.illegal_instruction.U) + ) + ) + + val idCoverCauses: Seq[(Int, String)] = List( + (CSR.debugTriggerCause, "DEBUG_TRIGGER"), + (Causes.breakpoint, "BREAKPOINT"), + (Causes.fetch_access, "FETCH_ACCESS"), + (Causes.illegal_instruction, "ILLEGAL_INSTRUCTION") + ) ++ Option.when(usingVM)((Causes.fetch_page_fault, "FETCH_PAGE_FAULT")) + // TODO: move it to verification module. + coverExceptions(idException, idCause, "DECODE", idCoverCauses) + + // Bypass signals + val dcacheBypassData: UInt = + if (fastLoadByte) dmem.resp.bits.data(xLen - 1, 0) + else if (fastLoadWord) dmem.resp.bits.data_word_bypass(xLen - 1, 0) + else wbRegWdata + // detect bypass opportunities + val exWaddr: UInt = exRegInstruction(11, 7) & regAddrMask.U + val memWaddr: UInt = memRegInstruction(11, 7) & regAddrMask.U + val wbWaddr: UInt = wbRegInstruction(11, 7) & regAddrMask.U + val bypassSources: Seq[(Bool, UInt, UInt)] = IndexedSeq( + (true.B, 0.U, 0.U), // treat reading x0 as a bypass + (exRegValid && exRegDecodeOutput(decoder.wxd), exWaddr, memRegWdata), + (memRegValid && memRegDecodeOutput(decoder.wxd) && !memRegDecodeOutput(decoder.mem), memWaddr, wbRegWdata), + (memRegValid && memRegDecodeOutput(decoder.wxd), memWaddr, dcacheBypassData) + ) + val idBypassSources: Seq[Seq[Bool]] = idRaddr.map(raddr => bypassSources.map(s => s._1 && s._2 === raddr)) + + // execute stage + val bypassMux: Seq[UInt] = bypassSources.map(_._3) + val exRegRsBypass: Vec[Bool] = Reg(Vec(idRaddr.size, Bool())) + val exRegRsLSB: Vec[UInt] = Reg(Vec(idRaddr.size, UInt(log2Ceil(bypassSources.size).W))) + val exRegRsMSB: Vec[UInt] = Reg(Vec(idRaddr.size, UInt())) + val exRs: Seq[UInt] = Seq.tabulate(idRaddr.size)(i => + Mux(exRegRsBypass(i), bypassMux(exRegRsLSB(i)), Cat(exRegRsMSB(i), exRegRsLSB(i))) + ) + val exImm: SInt = ImmGen(exRegDecodeOutput(decoder.selImm), exRegInstruction) + val exOp1: SInt = + MuxLookup(exRegDecodeOutput(decoder.selAlu1), 0.S)(Seq(A1_RS1 -> exRs(0).asSInt, A1_PC -> exRegPC.asSInt)) + val exOp2: SInt = MuxLookup(exRegDecodeOutput(decoder.selAlu2), 0.S)( + Seq(A2_RS2 -> exRs(1).asSInt, A2_IMM -> exImm, A2_SIZE -> Mux(exRegRVC, 2.S, 4.S)) + ) + + arithmeticLogicUnit.io.dw := exRegDecodeOutput(decoder.aluDoubleWords) + arithmeticLogicUnit.io.fn := exRegDecodeOutput(decoder.aluFn) + arithmeticLogicUnit.io.in2 := exOp2.asUInt + arithmeticLogicUnit.io.in1 := exOp1.asUInt + + // multiplier and divider + // TODO: waive them if !usingMulDiv + muldiv.io.req.valid := exRegValid && Option.when(usingMulDiv)(exRegDecodeOutput(decoder.div)).getOrElse(false.B) + muldiv.io.req.bits.dw := exRegDecodeOutput(decoder.aluDoubleWords) + muldiv.io.req.bits.fn := exRegDecodeOutput(decoder.aluFn) + muldiv.io.req.bits.in1 := exRs(0) + muldiv.io.req.bits.in2 := exRs(1) + muldiv.io.req.bits.tag := exWaddr + mul.foreach { m => + m.io.req.valid := exRegValid && exRegDecodeOutput(decoder.mul) + m.io.req.bits := muldiv.io.req.bits + } + + exRegValid := !ctrlKilled + exRegReplay := !takePc && instructionBufferOut.valid && instructionBufferOut.bits.replay + exRegException := !ctrlKilled && idException + exRegExceptionInterrupt := !takePc && instructionBufferOut.valid && csr.io.interrupt + + // ID goes to EX + when(!ctrlKilled) { + exRegDecodeOutput := idDecodeOutput + exRegRVC := instructionBufferOut.bits.rvc + exRegDecodeOutput(decoder.csr) := idCsr + when(idDecodeOutput(decoder.fence) && idFenceSucc === 0.U) { idRegPause := true.B } + when(idFenceNext) { idRegFence := true.B } + when(idException) { // pass PC down ALU writeback pipeline for badaddr + exRegDecodeOutput(decoder.aluFn) := aluFn.FN_ADD + exRegDecodeOutput(decoder.aluDoubleWords) := DW_XPR + exRegDecodeOutput(decoder.selAlu1) := A1_RS1 // badaddr := instruction + exRegDecodeOutput(decoder.selAlu2) := A2_ZERO + when(idException1.asUInt.orR) { // badaddr := PC+2 + exRegDecodeOutput(decoder.selAlu1) := A1_PC + exRegDecodeOutput(decoder.selAlu2) := A2_SIZE + exRegRVC := true.B + } + when(breakpointUnit.io.xcpt_if || idException0.asUInt.orR) { // badaddr := PC + exRegDecodeOutput(decoder.selAlu1) := A1_PC + exRegDecodeOutput(decoder.selAlu2) := A2_ZERO + } + } + exRegFlushPipe := idDecodeOutput(decoder.fenceI) || idCsrFlush + exRegLoadUse := idLoadUse + exRegHLS := + usingHypervisor.B && + idSystemInstruction && + idDecodeOutput(decoder.memCommand).isOneOf(M_XRD, M_XWR, M_HLVX) + exRegMemSize := Mux(usingHypervisor.B && idSystemInstruction, idInstruction(27, 26), idInstruction(13, 12)) + when(idDecodeOutput(decoder.memCommand).isOneOf(M_SFENCE, M_HFENCEV, M_HFENCEG, M_FLUSH_ALL)) { + exRegMemSize := Cat(idRaddr2 =/= 0.U, idRaddr1 =/= 0.U) + } + when(idDecodeOutput(decoder.memCommand) === M_SFENCE && csr.io.status.v) { + exRegDecodeOutput(decoder.memCommand) := M_HFENCEV + } + + if (flushOnFenceI) { + when(idDecodeOutput(decoder.fenceI)) { + exRegMemSize := 0.U + } + } + + Seq.tabulate(idRaddr.size) { i => + val doBypass = idBypassSources(i).reduce(_ || _) + val bypassSource = PriorityEncoder(idBypassSources(i)) + exRegRsBypass(i) := doBypass + exRegRsLSB(i) := bypassSource + when(idRen(i) && !doBypass) { + exRegRsLSB(i) := idRs(i)(log2Ceil(bypassSources.size) - 1, 0) + exRegRsMSB(i) := idRs(i) >> log2Ceil(bypassSources.size) + } + } + when(idIllegalInstruction || idVirtualInstruction) { + val inst = Mux(instructionBufferOut.bits.rvc, idRawInstruction(15, 0), idRawInstruction) + exRegRsBypass(0) := false.B + exRegRsLSB(0) := inst(log2Ceil(bypassSources.size) - 1, 0) + exRegRsMSB(0) := inst >> log2Ceil(bypassSources.size) + } + } + // ID goes to EX but with interrupt or replay + when(!ctrlKilled || csr.io.interrupt || instructionBufferOut.bits.replay) { + exRegCause := idCause + exRegInstruction := idInstruction + exRegRawInstruction := idRawInstruction + exRegPC := instructionBuffer.io.pc + exRegBTBResponse := instructionBuffer.io.btb_resp + exRegWphit := breakpointUnit.io.bpwatch.map { bpw => bpw.ivalid(0) } + } + // replay inst in ex stage? + val exPcValid: Bool = exRegValid || exRegReplay || exRegExceptionInterrupt + val wbDcacheMiss: Bool = wbRegDecodeOutput(decoder.mem) && !dmem.resp.valid + val replayExStructural: Bool = exRegDecodeOutput(decoder.mem) && !dmem.req.ready || Option + .when(usingMulDiv)(exRegDecodeOutput(decoder.div)) + .getOrElse(false.B) && !muldiv.io.req.ready + val replayExLoadUse: Bool = wbDcacheMiss && exRegLoadUse + val replayEx: Bool = exRegReplay || (exRegValid && (replayExStructural || replayExLoadUse)) + val ctrlKillx: Bool = takePcMemWb || replayEx || !exRegValid + // detect 2-cycle load-use delay for LB/LH/SC + val exSlowBypass: Bool = exRegDecodeOutput(decoder.memCommand) === M_XSC || exRegMemSize < 2.U + val exSfence: Bool = + usingVM.B && + exRegDecodeOutput(decoder.mem) && + (exRegDecodeOutput(decoder.memCommand) === M_SFENCE || + exRegDecodeOutput(decoder.memCommand) === M_HFENCEV || + exRegDecodeOutput(decoder.memCommand) === M_HFENCEG) + + val (exException: Bool, exCause: UInt) = checkExceptions( + List((exRegExceptionInterrupt || exRegException, exRegCause)) + ) + val exCoverCauses: Seq[(Int, String)] = idCoverCauses + coverExceptions(exException, exCause, "EXECUTE", exCoverCauses) + + // memory stage + val memPcValid: Bool = memRegValid || memRegReplay || memRegExceptionInterrupt + val memBranchTarget: SInt = memRegPc.asSInt + + Mux( + memRegDecodeOutput(decoder.isBranch) && memBranchTaken, + ImmGen(IMM_SB, memRegInstruction), + Mux(memRegDecodeOutput(decoder.isJal), ImmGen(IMM_UJ, memRegInstruction), Mux(memRegRVC, 2.S, 4.S)) + ) + val memNextPC: UInt = (Mux( + memRegDecodeOutput(decoder.isJalr) || memRegSfence, + encodeVirtualAddress(memRegWdata, memRegWdata).asSInt, + memBranchTarget + ) & (-2).S).asUInt + val memWrongNpc: Bool = + Mux( + exPcValid, + memNextPC =/= exRegPC, + Mux( + instructionBufferOut.valid || instructionBuffer.io.imem.valid, + memNextPC =/= instructionBuffer.io.pc, + true.B + ) + ) + val memNpcMisaligned: Bool = !csr.io.status.isa('c' - 'a') && memNextPC(1) && !memRegSfence + val memIntWdata: UInt = Mux( + !memRegException && (memRegDecodeOutput(decoder.isJalr) ^ memNpcMisaligned), + memBranchTarget, + memRegWdata.asSInt + ).asUInt + val memCfi: Bool = + memRegDecodeOutput(decoder.isBranch) || memRegDecodeOutput(decoder.isJalr) || memRegDecodeOutput(decoder.isJal) + val memCfiTaken: Bool = + (memRegDecodeOutput(decoder.isBranch) && memBranchTaken) || memRegDecodeOutput( + decoder.isJalr + ) || memRegDecodeOutput(decoder.isJal) + val memDirectionMisprediction: Bool = + memRegDecodeOutput(decoder.isBranch) && memBranchTaken =/= (usingBTB.B && memRegBTBResponse.taken) + val memMisprediction: Bool = if (usingBTB) memWrongNpc else memCfiTaken + takePcMem := memRegValid && !memRegException && (memMisprediction || memRegSfence) + + memRegValid := !ctrlKillx + memRegReplay := !takePcMemWb && replayEx + memRegException := !ctrlKillx && exException + memRegExceptionInterrupt := !takePcMemWb && exRegExceptionInterrupt + + // on pipeline flushes, cause mem_npc to hold the sequential npc, which + // will drive the W-stage npc mux + when(memRegValid && memRegFlushPipe) { + memRegSfence := false.B + }.elsewhen(exPcValid) { + memRegDecodeOutput := exRegDecodeOutput + memRegRVC := exRegRVC + memRegLoad := exRegDecodeOutput(decoder.mem) && isRead(exRegDecodeOutput(decoder.memCommand)) + memRegStore := exRegDecodeOutput(decoder.mem) && isWrite(exRegDecodeOutput(decoder.memCommand)) + memRegSfence := exSfence + memRegBTBResponse := exRegBTBResponse + memRegFlushPipe := exRegFlushPipe + memRegSlowBypass := exSlowBypass + memRegWphit := exRegWphit + + memRegCause := exCause + memRegInstruction := exRegInstruction + memRegRawInstruction := exRegRawInstruction + memRegMemSize := exRegMemSize + memRegHlsOrDv := dmem.req.bits.dv + memRegPc := exRegPC + // IDecode ensured they are 1H + memRegWdata := arithmeticLogicUnit.io.out + memBranchTaken := arithmeticLogicUnit.io.cmp_out + + when( + exRegDecodeOutput(decoder.rxs2) && (exRegDecodeOutput(decoder.mem) || exSfence) + ) { + val size = exRegMemSize + memRegRS2 := new StoreGen(size, 0.U, exRs(1), coreDataBytes).data + }.elsewhen(exRegDecodeOutput(decoder.rxs2) && exRegDecodeOutput(decoder.vector)) { + // for setvl + memRegRS2 := exRs(1) + } + when(exRegDecodeOutput(decoder.isJalr) && csr.io.status.debug) { + // flush I$ on D-mode JALR to effect uncached fetch without D$ flush + memRegDecodeOutput(decoder.fenceI) := true.B + memRegFlushPipe := true.B + } + } + + val memBreakpoint = (memRegLoad && breakpointUnit.io.xcpt_ld) || (memRegStore && breakpointUnit.io.xcpt_st) + val memDebugBreakpoint = (memRegLoad && breakpointUnit.io.debug_ld) || (memRegStore && breakpointUnit.io.debug_st) + val (memLoadStoreException, memLoadStoreCause) = checkExceptions( + List((memDebugBreakpoint, CSR.debugTriggerCause.U), (memBreakpoint, Causes.breakpoint.U)) + ) + + val (memException, memCause) = checkExceptions( + List( + (memRegExceptionInterrupt || memRegException, memRegCause), + (memRegValid && memNpcMisaligned, Causes.misaligned_fetch.U), + (memRegValid && memLoadStoreException, memLoadStoreCause) + ) + ) + + val memCoverCauses = (exCoverCauses ++ List( + (CSR.debugTriggerCause, "DEBUG_TRIGGER"), + (Causes.breakpoint, "BREAKPOINT"), + (Causes.misaligned_fetch, "MISALIGNED_FETCH") + )).distinct + coverExceptions(memException, memCause, "MEMORY", memCoverCauses) + + val dcacheKillMem = + memRegValid && memRegDecodeOutput(decoder.wxd) && dmem.replay_next // structural hazard on writeback port + // TODO: vectorKillMem? + val fpuKillMem = fpu.map(fpu => memRegValid && memRegDecodeOutput(decoder.fp) && fpu.nack_mem) + val replayMem = dcacheKillMem || memRegReplay || fpuKillMem.getOrElse(false.B) + val killmCommon = dcacheKillMem || takePcWb || memRegException || !memRegValid + muldiv.io.kill := killmCommon && RegNext(muldiv.io.req.fire) + val ctrlKillm = killmCommon || memException || fpuKillMem.getOrElse(false.B) + + // writeback stage + wbRegValid := !ctrlKillm + wbRegReplay := replayMem && !takePcWb + wbRegException := memException && !takePcWb + wbRegFlushPipe := !ctrlKillm && memRegFlushPipe + when(memPcValid) { + wbRegDecodeOutput := memRegDecodeOutput + wbRegSfence := memRegSfence + wbRegWdata := fpu + .map(fpu => + Mux( + !memRegException && memRegDecodeOutput(decoder.fp) && memRegDecodeOutput(decoder.wxd), + fpu.toint_data, + memIntWdata + ) + ) + .getOrElse(memIntWdata) + when(memRegSfence || memRegDecodeOutput(decoder.vector)) { + wbRegRS2 := memRegRS2 + } + wbRegCause := memCause + wbRegInstruction := memRegInstruction + wbRegRawInstruction := memRegRawInstruction + wbRegMemSize := memRegMemSize + wbRegHlsOrDv := memRegHlsOrDv + wbRegHfenceV := memRegDecodeOutput(decoder.memCommand) === M_HFENCEV + wbRegHfenceG := memRegDecodeOutput(decoder.memCommand) === M_HFENCEG + wbRegPc := memRegPc + wbRegWphit := memRegWphit | breakpointUnit.io.bpwatch.map { bpw => + (bpw.rvalid(0) && memRegLoad) || (bpw.wvalid(0) && memRegStore) + } + + } + + val (wbException, wbCause) = checkExceptions( + List( + (wbRegException, wbRegCause), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.pf.st, Causes.store_page_fault.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.pf.ld, Causes.load_page_fault.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.gf.st, Causes.store_guest_page_fault.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.gf.ld, Causes.load_guest_page_fault.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ae.st, Causes.store_access.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ae.ld, Causes.load_access.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ma.st, Causes.misaligned_store.U), + (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ma.ld, Causes.misaligned_load.U) + ) + ) + + val wbCoverCauses = Seq( + (Causes.misaligned_store, "MISALIGNED_STORE"), + (Causes.misaligned_load, "MISALIGNED_LOAD"), + (Causes.store_access, "STORE_ACCESS"), + (Causes.load_access, "LOAD_ACCESS") + ) ++ + Option + .when(usingVM)( + Seq( + (Causes.store_page_fault, "STORE_PAGE_FAULT"), + (Causes.load_page_fault, "LOAD_PAGE_FAULT") + ) + ) + .getOrElse(Seq()) ++ + Option + .when(usingHypervisor)( + Seq( + (Causes.store_guest_page_fault, "STORE_GUEST_PAGE_FAULT"), + (Causes.load_guest_page_fault, "LOAD_GUEST_PAGE_FAULT") + ) + ) + .getOrElse(Seq()) + coverExceptions(wbException, wbCause, "WRITEBACK", wbCoverCauses) + + val wbPcValid: Bool = wbRegValid || wbRegReplay || wbRegException + val wbWxd: Bool = wbRegValid && wbRegDecodeOutput(decoder.wxd) + val wbSetSboard: Bool = + wbDcacheMiss || + Option.when(usingMulDiv)(wbRegDecodeOutput(decoder.div)).getOrElse(false.B) || + Option + .when(usingVector) { + // 8. set Int scoreboard + wbRegDecodeOutput(decoder.wxd) && wbRegDecodeOutput(decoder.vector) && !wbRegDecodeOutput(decoder.vectorCSR) + } + .getOrElse(false.B) + val replayWbCommon: Bool = dmem.s2_nack || wbRegReplay + val replayWbCsr: Bool = wbRegValid && csr.io.rwStall + val replayWb: Bool = replayWbCommon || replayWbCsr + takePcWb := replayWb || wbException || csr.io.eret || wbRegFlushPipe + + // writeback arbitration + val dmemResponseXpu: Bool = !dmem.resp.bits.tag(0).asBool + val dmemResponseFpu: Bool = dmem.resp.bits.tag(0).asBool + val dmemResponseWaddr: UInt = dmem.resp.bits.tag(5, 1) + val dmemResponseValid: Bool = dmem.resp.valid && dmem.resp.bits.has_data + val dmemResponseReplay: Bool = dmemResponseValid && dmem.resp.bits.replay + + muldiv.io.resp.ready := !wbWxd + val longlatencyWdata: UInt = WireDefault(muldiv.io.resp.bits.data) + val longlatencyWaddress: UInt = WireDefault(muldiv.io.resp.bits.tag) + val longLatencyWenable: Bool = WireDefault(muldiv.io.resp.fire) + + when(dmemResponseReplay && dmemResponseXpu) { + muldiv.io.resp.ready := false.B + longlatencyWaddress := dmemResponseWaddr + longLatencyWenable := true.B + } + + val wbValid = wbRegValid && !replayWb && !wbException + val wbWen = wbValid && wbRegDecodeOutput(decoder.wxd) + // RF is at WB stage + val rfWen = wbWen || longLatencyWenable + val rfWaddr = Mux(longLatencyWenable, longlatencyWaddress, wbWaddr) + val rfWdata = Mux( + dmemResponseValid && dmemResponseXpu, + dmem.resp.bits.data(xLen - 1, 0), + Mux( + longLatencyWenable, + longlatencyWdata, + Mux( + (wbRegDecodeOutput(decoder.csr) =/= CSR.N) || wbRegDecodeOutput(decoder.vectorCSR), + csr.io.rw.rdata, + Mux( + Option.when(usingMulDiv && pipelinedMul)(wbRegDecodeOutput(decoder.mul)).getOrElse(false.B), + mul.map(_.io.resp.bits.data).getOrElse(wbRegWdata), + wbRegWdata + ) + ) + ) + ) + when(rfWen) { rf.write(rfWaddr, rfWdata) } + + // hook up control/status regfile + csr.io.ungatedClock := clock + csr.io.decode(0).inst := idInstruction + csr.io.exception := wbException + csr.io.cause := wbCause + csr.io.retire := wbValid + csr.io.inst(0) := ( + if (usingCompressed) + Cat(Mux(wbRegRawInstruction(1, 0).andR, wbRegInstruction >> 16, 0.U), wbRegRawInstruction(15, 0)) + else wbRegInstruction + ) + csr.io.interrupts := interrupts + csr.io.hartid := hartid + fpu.map { fpu => + fpu.fcsr_rm := csr.io.fcsrRm + csr.io.fcsrFlags := fpu.fcsr_flags + fpu.time := csr.io.time(31, 0) + fpu.hartid := hartid + }.getOrElse { + csr.io.fcsrFlags := DontCare + } + csr.io.pc := wbRegPc + val tvalDmemAddr = !wbRegException + val tvalAnyAddr = tvalDmemAddr || + wbRegCause.isOneOf( + Causes.breakpoint.U, + Causes.fetch_access.U, + Causes.fetch_page_fault.U, + Causes.fetch_guest_page_fault.U + ) + val tvalInstruction = wbRegCause === Causes.illegal_instruction.U + val tvalValid = wbException && (tvalAnyAddr || tvalInstruction) + csr.io.gva := wbException && (tvalAnyAddr && csr.io.status.v || tvalDmemAddr && wbRegHlsOrDv) + csr.io.tval := Mux(tvalValid, encodeVirtualAddress(wbRegWdata, wbRegWdata), 0.U) + csr.io.htval := { + val htvalValidImem = wbRegException && wbRegCause === Causes.fetch_guest_page_fault.U + val htvalImem = Mux(htvalValidImem, imem.gpa.bits, 0.U) + assert(!htvalValidImem || imem.gpa.valid) + + val htvalValidDmem = + wbException && tvalDmemAddr && dmem.s2_xcpt.gf.asUInt.orR && !dmem.s2_xcpt.pf.asUInt.orR + val htvalDmem = Mux(htvalValidDmem, dmem.s2_gpa, 0.U) + + (htvalDmem | htvalImem) >> hypervisorExtraAddrBits + } + ptw.ptbr := csr.io.ptbr + ptw.hgatp := csr.io.hgatp + ptw.vsatp := csr.io.vsatp + ptw.customCSRs.csrs.zip(csr.io.customCSRs).foreach { case (lhs, rhs) => lhs <> rhs } + ptw.status := csr.io.status + ptw.hstatus := csr.io.hstatus + ptw.gstatus := csr.io.gstatus + ptw.pmp := csr.io.pmp + csr.io.rw.addr := wbRegInstruction(31, 20) + csr.io.rw.cmd := CSR.maskCmd(wbRegValid, wbRegDecodeOutput(decoder.csr)) + csr.io.rw.wdata := wbRegWdata + csr.io.vectorCsr.foreach(_ := wbRegDecodeOutput(decoder.vectorCSR)) + csr.io.wbRegRS2.foreach(_ := wbRegRS2) + + bpwatch.zip(wbRegWphit).zip(csr.io.bp) + bpwatch.lazyZip(wbRegWphit).lazyZip(csr.io.bp).foreach { + case (iobpw, wphit, bp) => + iobpw.valid(0) := wphit + iobpw.action := bp.control.action + // tie off bpwatch valids + iobpw.rvalid.foreach(_ := false.B) + iobpw.wvalid.foreach(_ := false.B) + iobpw.ivalid.foreach(_ := false.B) + } + + val hazardTargets = Seq( + (idDecodeOutput(decoder.rxs1) && idRaddr1 =/= 0.U, idRaddr1), + (idDecodeOutput(decoder.rxs2) && idRaddr2 =/= 0.U, idRaddr2), + (idDecodeOutput(decoder.wxd) && idWaddr =/= 0.U, idWaddr) + ) + val fpHazardTargets = fpu.map(fpu => + Seq( + (fpu.dec.ren1, idRaddr1), + (fpu.dec.ren2, idRaddr2), + (fpu.dec.ren3, idRaddr3), + (fpu.dec.wen, idWaddr) + ) + ) + + val scoreboard: Scoreboard = new Scoreboard(32, true) + scoreboard.clear(longLatencyWenable, longlatencyWaddress) + def idScoreboardClearBypass(r: UInt): Bool = { + // ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check + if (tileParams.dcache.get.dataECC.isEmpty) longLatencyWenable && longlatencyWaddress === r + else + muldiv.io.resp.fire && muldiv.io.resp.bits.tag === r || dmemResponseReplay && dmemResponseXpu && dmemResponseWaddr === r + } + val idScoreboardHazard: Bool = + checkHazards(hazardTargets, rd => scoreboard.read(rd) && !idScoreboardClearBypass(rd)) + scoreboard.set(wbSetSboard && wbWen, wbWaddr) + + // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. + val exCannotBypass: Bool = + exRegDecodeOutput(decoder.csr) =/= CSR.N || + exRegDecodeOutput(decoder.isJalr) || + exRegDecodeOutput(decoder.mem) || + Option.when(usingMulDiv && pipelinedMul)(exRegDecodeOutput(decoder.mul)).getOrElse(false.B) || + Option.when(usingMulDiv)(exRegDecodeOutput(decoder.div)).getOrElse(false.B) || + Option.when(usingFPU)(exRegDecodeOutput(decoder.fp)).getOrElse(false.B) + val dataHazardEx: Bool = exRegDecodeOutput(decoder.wxd) && checkHazards(hazardTargets, _ === exWaddr) + val fpDataHazardEx: Option[Bool] = fpHazardTargets.map(fpHazardTargets => + idDecodeOutput(decoder.fp) && exRegDecodeOutput(decoder.wfd) && checkHazards(fpHazardTargets, _ === exWaddr) + ) + val idExHazard: Bool = exRegValid && (dataHazardEx && exCannotBypass || fpDataHazardEx.getOrElse(false.B)) + + // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. + // TODO: what's BH? + val memMemCmdBh: Bool = + if (fastLoadWord) (!fastLoadByte).B && memRegSlowBypass + else true.B + val memCannotBypass: Bool = + memRegDecodeOutput(decoder.csr) =/= CSR.N || + memRegDecodeOutput(decoder.mem) && memMemCmdBh || + Option.when(usingMulDiv && pipelinedMul)(memRegDecodeOutput(decoder.mul)).getOrElse(false.B) || + Option.when(usingMulDiv)(memRegDecodeOutput(decoder.div)).getOrElse(false.B) || + Option.when(usingFPU)(memRegDecodeOutput(decoder.fp)).getOrElse(false.B) + val dataHazardMem: Bool = memRegDecodeOutput(decoder.wxd) && checkHazards(hazardTargets, _ === memWaddr) + val fpDataHazardMem: Option[Bool] = fpHazardTargets.map(fpHazardTargets => + idDecodeOutput(decoder.fp) && + memRegDecodeOutput(decoder.wfd) && + checkHazards(fpHazardTargets, _ === memWaddr) + ) + val idMemHazard: Bool = memRegValid && (dataHazardMem && memCannotBypass || fpDataHazardMem.getOrElse(false.B)) + idLoadUse := memRegValid && dataHazardMem && memRegDecodeOutput(decoder.mem) + // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. + val dataHazardWb: Bool = wbRegDecodeOutput(decoder.wxd) && checkHazards(hazardTargets, _ === wbWaddr) + val fpDataHazardWb: Bool = fpHazardTargets + .map(fpHazardTargets => + idDecodeOutput(decoder.fp) && + wbRegDecodeOutput(decoder.wfd) && + checkHazards(fpHazardTargets, _ === wbWaddr) + ) + .getOrElse(false.B) + val idWbHazard: Bool = wbRegValid && (dataHazardWb && wbSetSboard || fpDataHazardWb) + val idStallFpu: Bool = + fpu + .zip(fpHazardTargets) + .map { + case (fpu, fpHazardTargets) => + val fpScoreboard = new Scoreboard(32) + // 8. set FP scoreboard + fpScoreboard.set(((wbDcacheMiss || wbRegDecodeOutput(decoder.vector)) && wbRegDecodeOutput(decoder.wfd) || fpu.sboard_set) && wbValid, wbWaddr) + fpScoreboard.clear(dmemResponseReplay && dmemResponseFpu, dmemResponseWaddr) + t1Response.foreach { response => + val vectorTryToWriteFP = response.bits.rd.valid && response.bits.float + fpScoreboard.clear(response.fire && vectorTryToWriteFP, response.bits.rd.bits) + } + fpScoreboard.clear(fpu.sboard_clr, fpu.sboard_clra) + checkHazards(fpHazardTargets, fpScoreboard.read) + } + .getOrElse(false.B) + + val dcacheBlocked: Bool = { + // speculate that a blocked D$ will unblock the cycle after a Grant + val blocked = Reg(Bool()) + blocked := !dmem.req.ready && dmem.clock_enabled && !dmem.perf.grant && (blocked || dmem.req.valid || dmem.s2_nack) + blocked && !dmem.perf.grant + } + + // vector stall + val vectorLSUEmpty: Option[Bool] = Option.when(usingVector)(Wire(Bool())) + val vectorQueueFull: Option[Bool] = Option.when(usingVector)(Wire(Bool())) + val vectorStall: Option[Bool] = Option.when(usingVector) { + val vectorLSUNotClear = + (exRegValid && exRegDecodeOutput(decoder.vectorLSU)) || + (memRegValid && memRegDecodeOutput(decoder.vectorLSU)) || + (wbRegValid && wbRegDecodeOutput(decoder.vectorLSU)) || + !vectorLSUEmpty.get + // Vector instruction queue is full + // TODO: need cover. + (idDecodeOutput(decoder.vector) && vectorQueueFull.get) || + // There is an outstanding LSU. + (idDecodeOutput(decoder.mem) && !idDecodeOutput(decoder.vector) && vectorLSUNotClear) + } + + // TODO: vector stall + val ctrlStalld: Bool = + idExHazard || idMemHazard || idWbHazard || idScoreboardHazard || idDoFence || idRegPause || + csr.io.csrStall || csr.io.singleStep && (exRegValid || memRegValid || wbRegValid) || + idCsrEn && csr.io.decode(0).fpCsr && !fpu.map(_.fcsr_rdy).getOrElse(false.B) || traceStall || + !clockEnable || + Option.when(usingFPU)(idDecodeOutput(decoder.fp) && idStallFpu).getOrElse(false.B) || + idDecodeOutput(decoder.mem) && dcacheBlocked || // reduce activity during D$ misses + Option + .when(usingMulDiv)( + idDecodeOutput( + decoder.div + ) && (!(muldiv.io.req.ready || (muldiv.io.resp.valid && !wbWxd)) || muldiv.io.req.valid) + ) + .getOrElse(false.B) || // reduce odds of replay + // TODO: vectorStall is large, we may need it to gate the scalar core. + vectorStall.getOrElse(false.B) + + ctrlKilled := + // IBUF not bubble + !instructionBuffer.io.inst(0).valid || + // Miss + instructionBufferOut.bits.replay || + // flush + takePcMemWb || + // + ctrlStalld || + csr.io.interrupt + + imem.req.valid := takePc + imem.req.bits.speculative := !takePcWb + // flush or branch misprediction + imem.req.bits.pc := Mux( + wbException || csr.io.eret, + csr.io.evec, // exception or [m|s]ret + Mux( + replayWb, + wbRegPc, // replay + memNextPC + ) + ) + imem.flush_icache := wbRegValid && wbRegDecodeOutput(decoder.fenceI) && !dmem.s2_nack + imem.might_request := { + imemMightRequestReg := exPcValid || memPcValid || ptw.customCSRs.disableICacheClockGate + imemMightRequestReg + } + imem.progress := RegNext(wbRegValid && !replayWbCommon) + imem.sfence.valid := wbRegValid && wbRegSfence + imem.sfence.bits.rs1 := wbRegMemSize(0) + imem.sfence.bits.rs2 := wbRegMemSize(1) + imem.sfence.bits.addr := wbRegWdata + imem.sfence.bits.asid := wbRegRS2 + imem.sfence.bits.hv := wbRegHfenceV + imem.sfence.bits.hg := wbRegHfenceG + ptw.sfence := imem.sfence + + instructionBufferOut.ready := !ctrlStalld + + imem.btb_update.valid := memRegValid && !takePcWb && memWrongNpc && (!memCfi || memCfiTaken) + imem.btb_update.bits.isValid := memCfi + imem.btb_update.bits.cfiType := + Mux( + (memRegDecodeOutput(decoder.isJal) || memRegDecodeOutput(decoder.isJalr)) && memWaddr(0), + CFIType.call, + Mux( + memRegDecodeOutput(decoder.isJalr) && (memRegInstruction(19, 15) & regAddrMask.U) === BitPat("b00?01"), + CFIType.ret, + Mux(memRegDecodeOutput(decoder.isJal) || memRegDecodeOutput(decoder.isJalr), CFIType.jump, CFIType.branch) + ) + ) + imem.btb_update.bits.target := imem.req.bits.pc + imem.btb_update.bits.br_pc := (if (usingCompressed) memRegPc + Mux(memRegRVC, 0.U, 2.U) else memRegPc) + imem.btb_update.bits.pc := ~(~imem.btb_update.bits.br_pc | (coreInstBytes * fetchWidth - 1).U) + imem.btb_update.bits.prediction := memRegBTBResponse + imem.btb_update.bits.taken := DontCare + + imem.bht_update.valid := memRegValid && !takePcWb + imem.bht_update.bits.pc := imem.btb_update.bits.pc + imem.bht_update.bits.taken := memBranchTaken + imem.bht_update.bits.mispredict := memWrongNpc + imem.bht_update.bits.branch := memRegDecodeOutput(decoder.isBranch) + imem.bht_update.bits.prediction := memRegBTBResponse.bht + + // Connect RAS in Frontend + imem.ras_update := DontCare + + fpu.foreach { fpu => + fpu.valid := !ctrlKilled && idDecodeOutput(decoder.fp) + fpu.killx := ctrlKillx + fpu.killm := killmCommon + fpu.inst := idInstruction + fpu.fromint_data := exRs(0) + fpu.dmem_resp_val := dmemResponseValid && dmemResponseFpu + fpu.dmem_resp_data := (if (minFLen == 32) dmem.resp.bits.data_word_bypass else dmem.resp.bits.data) + fpu.dmem_resp_type := dmem.resp.bits.size + fpu.dmem_resp_tag := dmemResponseWaddr + fpu.keep_clock_enabled := ptw.customCSRs.disableCoreClockGate + } + + t1Request.foreach { t1 => + // Send instruction to T1 when write back. + t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(decoder.vector) + t1.bits.instruction := wbRegInstruction + t1.bits.rs1Data := wbRegWdata + t1.bits.rs2Data := wbRegRS2 + + val response: DecoupledIO[VectorResponse] = t1Response.get + + // TODO: make it configurable + val maxCount: Int = 32 + val countWidth = log2Up(maxCount) + + def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = { + val counter: UInt = RegInit(0.U(size.W)) + val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt) + val updateCounter = grant ^ release + when(updateCounter) { + counter := nextCount + } + flush.foreach(f => when(f)(counter := 0.U)) + val empty = (updateCounter && nextCount === 0.U) || counter === 0.U + val fullCounter: Int = (1 << size) - 1 - margin + val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U + (empty, full) + } + // Maintain lsu counter + val lsuGrant: Bool = t1.valid && wbRegDecodeOutput(decoder.vectorLSU) + val lsuRelease: Bool = response.fire && response.bits.mem + val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease) + // Maintain vector counter + // There may be 4 instructions in the pipe + val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get) + vectorLSUEmpty.foreach(_ := lsuEmpty) + vectorQueueFull.foreach(_ := vectorFull) + } + // todo: vector change csr + t1Response.foreach { vectorResponse => + val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float + val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float + vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && + (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP) + when(vectorResponse.fire && vectorTryToWriteRd) { + longlatencyWdata := vectorResponse.bits.data + longlatencyWaddress := vectorResponse.bits.rd.bits + longLatencyWenable := true.B + } + fpu.foreach { fpu => + when(!(dmemResponseValid && dmemResponseFpu)) { + fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP + fpu.dmem_resp_data := vectorResponse.bits.data + // todo: 32 bit only + fpu.dmem_resp_type := 2.U + // todo: connect tag + fpu.dmem_resp_tag := 0.U + } + } + } + + dmem.req.valid := exRegValid && exRegDecodeOutput(decoder.mem) + val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(decoder.fp)).getOrElse(false.B)) + require(coreParams.dcacheReqTagBits >= ex_dcache_tag.getWidth) + dmem.req.bits.tag := ex_dcache_tag + dmem.req.bits.cmd := exRegDecodeOutput(decoder.memCommand) + dmem.req.bits.size := exRegMemSize + dmem.req.bits.signed := !Mux(exRegHLS, exRegInstruction(20), exRegInstruction(14)) + dmem.req.bits.phys := false.B + dmem.req.bits.addr := encodeVirtualAddress(exRs(0), arithmeticLogicUnit.io.adder_out) + dmem.req.bits.idx.foreach(_ := dmem.req.bits.addr) + dmem.req.bits.dprv := Mux(exRegHLS, csr.io.hstatus.spvp, csr.io.status.dprv) + dmem.req.bits.dv := exRegHLS || csr.io.status.dv + dmem.req.bits.no_alloc := DontCare + dmem.req.bits.no_xcpt := DontCare + dmem.req.bits.data := DontCare + dmem.req.bits.mask := DontCare + dmem.s1_data.data := fpu + .map(fpu => Mux(memRegDecodeOutput(decoder.fp), Fill(xLen.max(fLen) / fLen, fpu.store_data), memRegRS2)) + .getOrElse(memRegRS2) + dmem.s1_data.mask := DontCare + + dmem.s1_kill := killmCommon || memLoadStoreException || fpuKillMem.getOrElse(false.B) + dmem.s2_kill := false.B + // don't let D$ go to sleep if we're probably going to use it soon + dmem.keep_clock_enabled := instructionBufferOut.valid && idDecodeOutput(decoder.mem) && !csr.io.csrStall + + // gate the clock + val unpause: Bool = + csr.io.time(rocketParams.lgPauseCycles - 1, 0) === 0.U || csr.io.inhibitCycle || dmem.perf.release || takePc + when(unpause) { idRegPause := false.B } + cease := csr.io.status.cease && !clockEnableReg + wfi := csr.io.status.wfi + if (rocketParams.clockGate) { + longLatencyStall := csr.io.csrStall || dmem.perf.blocked || idRegPause && !unpause + clockEnable := clockEnableReg || exPcValid || (!longLatencyStall && imem.resp.valid) + clockEnableReg := + exPcValid || memPcValid || wbPcValid || // instruction in flight + ptw.customCSRs.disableCoreClockGate || // chicken bit + !muldiv.io.req.ready || // mul/div in flight + fpu.map(!_.fcsr_rdy).getOrElse(false.B) || // long-latency FPU in flight + dmem.replay_next || // long-latency load replaying + (!longLatencyStall && (instructionBufferOut.valid || imem.resp.valid)) // instruction pending + + assert(!(exPcValid || memPcValid || wbPcValid) || clockEnable) + } + + // evaluate performance counters + val icacheBlocked = !(imem.resp.valid || RegNext(imem.resp.valid)) + csr.io.counters.foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) } + } + + def checkExceptions(x: Seq[(Bool, UInt)]) = + (x.map(_._1).reduce(_ || _), PriorityMux(x)) + + def coverExceptions( + exceptionValid: Bool, + cause: UInt, + labelPrefix: String, + coverCausesLabels: Seq[(Int, String)] + ): Unit = { + for ((coverCause, label) <- coverCausesLabels) { + property.cover(exceptionValid && (cause === coverCause.U), s"${labelPrefix}_${label}") + } + } + + def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = + targets.map(h => h._1 && cond(h._2)).reduce(_ || _) + + def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea + else { + // efficient means to compress 64-bit VA into vaddrBits+1 bits + // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) + val b = vaddrBitsExtended - 1 + val a = (a0 >> b).asSInt + val msb = Mux(a === 0.S || a === -1.S, ea(b), !ea(b - 1)) + Cat(msb, ea(b - 1, 0)) + } + + class Scoreboard(n: Int, zero: Boolean = false) { + def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) + def read(addr: UInt): Bool = r(addr) + def readBypassed(addr: UInt): Bool = _next(addr) + + private val _r = RegInit(0.U(n.W)) + private val r = if (zero) (_r >> 1 << 1) else _r + private var _next = r + private var ens = false.B + private def mask(en: Bool, addr: UInt) = Mux(en, 1.U << addr, 0.U) + private def update(en: Bool, update: UInt) = { + _next = update + ens = ens || en + when(ens) { _r := _next } + } + } +} + +class RegFile(n: Int, w: Int, zero: Boolean = false) { + val rf = Mem(n, UInt(w.W)) + private def access(addr: UInt) = rf(~addr(log2Up(n) - 1, 0)) + private val reads = ArrayBuffer[(UInt, UInt)]() + private var canRead = true + def read(addr: UInt) = { + require(canRead) + reads += addr -> Wire(UInt()) + reads.last._2 := Mux(zero.B && addr === 0.U, 0.U, access(addr)) + reads.last._2 + } + def write(addr: UInt, data: UInt) = { + canRead = false + when(addr =/= 0.U) { + access(addr) := data + for ((raddr, rdata) <- reads) + when(addr === raddr) { rdata := data } + } + } +} + +object ImmGen { + def apply(sel: UInt, inst: UInt) = { + val sign = Mux(sel === IMM_Z, 0.S, inst(31).asSInt) + val b30_20 = Mux(sel === IMM_U, inst(30, 20).asSInt, sign) + val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19, 12).asSInt) + val b11 = Mux( + sel === IMM_U || sel === IMM_Z, + 0.S, + Mux(sel === IMM_UJ, inst(20).asSInt, Mux(sel === IMM_SB, inst(7).asSInt, sign)) + ) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, 0.U, inst(30, 25)) + val b4_1 = Mux( + sel === IMM_U, + 0.U, + Mux(sel === IMM_S || sel === IMM_SB, inst(11, 8), Mux(sel === IMM_Z, inst(19, 16), inst(24, 21))) + ) + val b0 = Mux(sel === IMM_S, inst(7), Mux(sel === IMM_I, inst(20), Mux(sel === IMM_Z, inst(15), 0.U))) + + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt + } +} From c0a8364bbc5d17523144fd41a8a1d9b2f9b4e806 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Wed, 3 Jul 2024 16:34:05 +0800 Subject: [PATCH 077/140] [rocketv] migrate Rocket --- rocketv/src/RocketCore.scala | 1353 ++++++++++++++++++---------------- 1 file changed, 737 insertions(+), 616 deletions(-) diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala index b1aa9294d..1dce3e850 100644 --- a/rocketv/src/RocketCore.scala +++ b/rocketv/src/RocketCore.scala @@ -1,136 +1,405 @@ -// See chipsalliance:rocket-chip LICENSE.Berkeley for license details. -// See chipsalliance:rocket-chip LICENSE.SiFive for license details. - -package org.chipsalliance.t1.rocketcore +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2012-2014 The Regents of the University of California +// SPDX-FileCopyrightText: 2016-2017 SiFive, Inc +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import chisel3.util._ +import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.circt.ClockGate import chisel3.util.experimental.decode.DecodeBundle -import freechips.rocketchip.tile.TileInterrupts -import freechips.rocketchip.util._ -import org.chipsalliance.cde.config.{Field, Parameters} -import org.chipsalliance.t1.rockettile.{VectorRequest, VectorResponse} +import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil} +import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes +import org.chipsalliance.rvdecoderdb.Instruction -import scala.collection.mutable.ArrayBuffer +object RocketParameter { + implicit def rwP: upickle.default.ReadWriter[RocketParameter] = upickle.default.macroRW[RocketParameter] +} -// TODO: remove it. -import freechips.rocketchip.rocket.{Causes, MulDivParams, RocketCoreParams} -import freechips.rocketchip.tile.{FPUCoreIO, HasCoreParameters} +case class RocketParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + instructionSets: Set[String], + vLen: Int, + usingUser: Boolean, + hartIdLen: Int, + nPMPs: Int, + asidBits: Int, + nBreakpoints: Int, + usingBTB: Boolean, + useBPWatch: Boolean, + mcontextWidth: Int, + scontextWidth: Int, + mulDivLantency: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + hasBeu: Boolean, + fastLoadByte: Boolean, + fastLoadWord: Boolean, + dcacheNSets: Int, + flushOnFenceI: Boolean + ) + extends SerializableModuleParameter { + + // fixed for now + def usingRVE = false + def usingDataScratchpad: Boolean = false + def hasDataECC: Boolean = false + def vmidBits = 0 + def nPerfCounters = 0 + + // calculated + def lgNXRegs = if (usingRVE) 4 else 5 + + def pipelinedMul: Boolean = usingMulDiv && mulUnroll == xLen + + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) -trait HasRocketCoreParameters extends HasCoreParameters { - lazy val rocketParams: RocketCoreParams = tileParams.core.asInstanceOf[RocketCoreParams] + def coreInstBytes = (if (usingCompressed) 16 else 32) / 8 - val fastLoadWord = rocketParams.fastLoadWord - val fastLoadByte = rocketParams.fastLoadByte + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) - val mulDivParams = rocketParams.mulDiv.getOrElse(MulDivParams()) // TODO ask andrew about this + private def hasInstruction(instName: String): Boolean = instructions.map(_.name).contains(instName) - val aluFn = new ALUFN + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } - require(!fastLoadByte || fastLoadWord) - require(!rocketParams.haveFSDirty, "rocket doesn't support setting fs dirty from outside, please disable haveFSDirty") - require(!usingConditionalZero, "Zicond is not yet implemented in ABLU") -} -class CoreInterrupts(val hasBeu: Boolean)(implicit p: Parameters) extends TileInterrupts()(p) { - val buserror = Option.when(hasBeu)(Bool()) + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) + } + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def usingMulDiv = hasInstructionSet("rv_m") || hasInstructionSet("rv64_m") + + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingVM = hasInstructionSet("sfence.vma") + + def usingSupervisor = hasInstruction("sret") + + // static to false for now + def usingHypervisor = hasInstructionSet("rv_h") || hasInstructionSet("rv64_h") + + def usingDebug = hasInstructionSet("rv_sdext") + + def usingCompressed = hasInstructionSet("rv_c") + + def usingFPU = fLen.isDefined + + // static to false for now + def haveCease = hasInstruction("cease") + + // static to false for now + def usingNMI = hasInstructionSet("rv_smrnmi") + + def usingVector = hasInstructionSet("rv_v") + + // calculated parameter + def fetchWidth: Int = if (usingCompressed) 2 else 1 + + def resetVectorLen: Int = { + val externalLen = paddrBits + require(externalLen <= xLen, s"External reset vector length ($externalLen) must be <= XLEN ($xLen)") + require(externalLen <= vaddrBitsExtended, s"External reset vector length ($externalLen) must be <= virtual address bit width ($vaddrBitsExtended)") + externalLen + } + + val nLocalInterrupts: Int = 0 + + def pgIdxBits: Int = 12 + def pgLevels: Int = if (xLen == 64) 3 /* Sv39 */ else 2 /* Sv32 */ + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def maxSVAddrBits: Int = pgIdxBits + pgLevels * pgLevelBits + def maxHypervisorExtraAddrBits: Int = 2 + def hypervisorExtraAddrBits: Int = if (usingHypervisor) maxHypervisorExtraAddrBits else 0 + def maxHVAddrBits: Int = maxSVAddrBits + hypervisorExtraAddrBits + def vaddrBits: Int = if (usingVM) { + val v = maxHVAddrBits + require(v == xLen || xLen > v && v > paddrBits) + v + } else { + // since virtual addresses sign-extend but physical addresses + // zero-extend, make room for a zero sign bit for physical addresses + (paddrBits + 1) min xLen + } + def vpnBits: Int = vaddrBits - pgIdxBits + def ppnBits: Int = paddrBits - pgIdxBits + def vpnBitsExtended: Int = vpnBits + (if (vaddrBits < xLen) (if (usingHypervisor) 1 else 0) + 1 else 0) + + def vaddrBitsExtended: Int = vpnBitsExtended + pgIdxBits + // btb entries + def btbEntries: Int = 28 + def bhtHistoryLength: Option[Int] = Some(8) + def bhtCounterLength: Option[Int] = Some(1) + def coreInstBits: Int = if (usingCompressed) 16 else 32 + def coreMaxAddrBits: Int = paddrBits max vaddrBitsExtended + def lgCacheBlockBytes: Int = log2Ceil(cacheBlockBytes) + def blockOffBits = lgCacheBlockBytes + // todo: 64 -> dcacheParan.nset + def idxBits: Int = log2Ceil(dcacheNSets) + // dCache untage bits + def untagBits: Int = blockOffBits + idxBits + def dcacheReqTagBits: Int = 6 + def dcacheArbPorts: Int = 1 + (if(usingVM) 1 else 0) + (if(usingDataScratchpad) 1 else 0) + def coreDataBits: Int = xLen max fLen.getOrElse(0) + def coreDataBytes: Int = coreDataBits / 8 + def separateUncachedResp: Boolean = false + def minPgLevels: Int = { + val res = xLen match { + case 32 => 2 + case 64 => 3 + } + require(pgLevels >= res) + res + } + + def maxPAddrBits: Int = { + require(xLen == 32 || xLen == 64, s"Only XLENs of 32 or 64 are supported, but got $xLen") + xLen match { case 32 => 34; case 64 => 56 } + } + + val csrParameter: CSRParameter = CSRParameter( + useAsyncReset: Boolean, + vLen: Int, + xLen: Int, + fLen.getOrElse(0): Int, + hartIdLen: Int, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + vmidBits: Int, + nPMPs: Int, + nPerfCounters: Int, + paddrBits: Int, + nBreakpoints: Int, + usingSupervisor: Boolean, + usingFPU: Boolean, + usingUser: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingAtomics: Boolean, + usingDebug: Boolean, + usingMulDiv: Boolean, + usingVector: Boolean + ) + val decoderParameter = DecoderParameter( + instructionSets, + pipelinedMul, + flushOnFenceI + ) + val iBufParameter: IBufParameter = IBufParameter( + useAsyncReset, + xLen, + usingCompressed, + vaddrBits, + btbEntries, + vaddrBitsExtended, + bhtHistoryLength, + bhtCounterLength, + fetchWidth + ) + val breakpointUnitParameter: BreakpointUnitParameter = BreakpointUnitParameter( + nBreakpoints, + xLen, + useBPWatch, + vaddrBits, + mcontextWidth, + scontextWidth + ) + val aluParameter: ALUParameter = ALUParameter(xLen) + val mulDivParameter: MulDivParameter = MulDivParameter( + useAsyncReset: Boolean, + mulDivLantency: Int, + xLen: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean) + val mulParameter: Option[PipelinedMultiplierParameter] = Option.when(usingMulDiv && mulUnroll == xLen)(PipelinedMultiplierParameter( + useAsyncReset: Boolean, + 2, + xLen: Int + )) } -class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters) - extends Module - with HasRocketCoreParameters { - // Checker - require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth) - require(!(coreParams.useRVE && coreParams.fpu.nonEmpty), "Can't select both RVE and floating-point") - require(!(coreParams.useRVE && coreParams.useHypervisor), "Can't select both RVE and Hypervisor") - - // Parameters - val pipelinedMul: Boolean = usingMulDiv && mulDivParams.mulUnroll == xLen - val decoder: InstructionDecoder = new org.chipsalliance.t1.rocketcore.InstructionDecoder( - org.chipsalliance.t1.rocketcore.InstructionDecoderParameter( - (org.chipsalliance.rvdecoderdb.instructions(org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)) ++ - org.chipsalliance.t1.rocketcore.CustomInstructions.rocketSet).filter { i => - i.instructionSets.map(_.name) match { - // I - case s if s.contains("rv_i") => true - case s if s.contains("rv32_i") => xLen == 32 - case s if s.contains("rv64_i") => xLen == 64 - // M - case s if s.contains("rv_m") => usingMulDiv - case s if s.contains("rv64_m") => (xLen == 64) && usingMulDiv - // A - case s if s.contains("rv_a") => usingAtomics - case s if s.contains("rv64_a") => (xLen == 64) && usingAtomics - // ZICSR - case s if s.contains("rv_zicsr") => true - // ZIFENCEI - case s if s.contains("rv_zifencei") => true - // F - case s if s.contains("rv_f") => !(fLen == 0) - case s if s.contains("rv64_f") => (xLen == 64) && !(fLen == 0) - // D - case s if s.contains("rv_d") => fLen == 64 - case s if s.contains("rv64_d") => (xLen == 64) && (fLen == 64) - // ZFH - case s if s.contains("rv_zfh") => minFLen == 16 - case s if s.contains("rv64_zfh") => (xLen == 64) && (minFLen == 16) - case s if s.contains("rv_d_zfh") => (fLen == 64) && (minFLen == 16) - - // Priv - case s if s.contains("rv_system") => true - // Supervisor - case s if s.contains("rv_s") => - i.name match { - // if support superviosr but don't support virtual memory, raise illinstr. - case s if s.contains("sfence.vma") => usingVM - case s if s.contains("sret") => usingSupervisor - } - case s if s.contains("rv_smrnmi") => usingNMI - // Hypervisor - case s if s.contains("rv_h") => usingHypervisor - case s if s.contains("rv64_h") => (xLen == 64) && usingHypervisor - // Debug - case s if s.contains("rv_sdext") => usingDebug - - // T1 Vector - case s if s.contains("rv_v") => usingVector - // unratified but supported. - case s if s.contains("rv_zicond") => usingConditionalZero - // custom - case s if s.contains("rv_rocket") => - i.name match { - case "c.flush.d.l1" => coreParams.haveCFlush - case "c.discard.d.l1" => coreParams.haveCFlush - case "cease" => rocketParams.haveCease - } - case _ => false - } - }.filter { - // special case for rv32 pseudo from rv64 - case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true - case i if i.pseudoFrom.isDefined => false - case _ => true - }.toSeq.distinct, - pipelinedMul, - flushOnFenceI +/** The Interface of [[Rocket]]. + * The [[Rocket]] is the public + */ +class RocketInterface(parameter: RocketParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val interrupts = Flipped(new TileInterrupts(parameter.usingSupervisor, parameter.nLocalInterrupts, parameter.usingNMI, parameter.resetVectorLen)) + val buserror = Input(Bool()) + val imem = new FrontendIO( + parameter.vaddrBitsExtended, + parameter.vaddrBits, + parameter.asidBits, + parameter.btbEntries, + parameter.bhtHistoryLength, + parameter.bhtCounterLength, + parameter.coreInstBits, + parameter.fetchWidth + ) + + val dmem = new HellaCacheIO( + parameter.coreMaxAddrBits, + parameter.usingVM, + parameter.untagBits, + parameter.pgIdxBits, + parameter.dcacheReqTagBits, + parameter.dcacheArbPorts, + parameter.coreDataBytes, + parameter.paddrBits, + parameter.vaddrBitsExtended, + parameter.separateUncachedResp + ) + + val ptw = Flipped( + new DatapathPTWIO( + parameter.xLen, + parameter.maxPAddrBits, + parameter.pgIdxBits: Int, + parameter.vaddrBits: Int, + parameter.asidBits: Int, + parameter.nPMPs, + parameter.paddrBits: Int ) ) - val lgNXRegs: Int = if (coreParams.useRVE) 4 else 5 - val regAddrMask: Int = (1 << lgNXRegs) - 1 - - val hartid = IO(Input(UInt(hartIdLen.W))) - val interrupts = IO(Input(new CoreInterrupts(hasBeu))) - val imem = IO(new FrontendIO) - val dmem = IO(new HellaCacheIO) - val ptw = IO(Flipped(new DatapathPTWIO())) - val fpu = Option.when(usingFPU)(IO(Flipped(new FPUCoreIO()))) - val bpwatch = IO(Output(Vec(coreParams.nBreakpoints, new BPWatch(coreParams.retireWidth)))) - val cease = IO(Output(Bool())) - val wfi = IO(Output(Bool())) - val traceStall = IO(Input(Bool())) - val t1Request = Option.when(usingVector)(IO(Valid(new VectorRequest(xLen)))) - val t1Response = Option.when(usingVector)(IO(Flipped(Decoupled(new VectorResponse(xLen))))) - // logic for T1 - val t1IssueQueueRelease = Option.when(usingVector)(IO(Input(Bool()))) + val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen))) + val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch)) + val cease = Output(Bool()) + val wfi = Output(Bool()) + val traceStall = Input(Bool()) +} + +/** The [[Rocket]] is the next version of the RocketCore, + * All micro architectures are from the original RocketCore. + * The development of [[Rocket]] happens in the T1 project. + * It will be moved to the standalone pacakge until it get verified. + * + * Here are some basic idea of [[Rocket]], + * - it should be linkable by providing an verification constraint to other components. + * - open expose [[RocketParameter]] and [[RocketInterface]] to users, all internal API are subject to be changed. + * - There is no coherent support for the [[Rocket]] until chipsalliance having the CHI interconnect and cache IP. + * - The in-tile components contains Frontend, HellaCache, FPU, T1, but the memory subsystem only supports AXI. + */ +@instantiable +class Rocket(val parameter: RocketParameter) + extends FixedIORawModule(new RocketInterface(parameter)) + with SerializableModule[RocketParameter] + with ImplicitClock + with ImplicitReset { + override protected def implicitClock: Clock = io.clock + override protected def implicitReset: Reset = io.reset + val csr: Instance[CSR] = Instantiate(new CSR(parameter.csrParameter)) + val decoder: Instance[Decoder] = Instantiate(new Decoder(parameter.decoderParameter)) + val instructionBuffer: Instance[IBuf] = Instantiate(new IBuf(parameter.iBufParameter)) + val breakpointUnit: Instance[BreakpointUnit] = Instantiate(new BreakpointUnit(parameter.breakpointUnitParameter)) + val alu: Instance[ALU] = Instantiate(new ALU(parameter.aluParameter)) + val mulDiv: Instance[MulDiv] = Instantiate(new MulDiv(parameter.mulDivParameter)) + val mul: Option[Instance[PipelinedMultiplier]] = parameter.mulParameter.map(p => Instantiate(new PipelinedMultiplier(p))) + + // compatibility mode. + object rocketParams { + def clockGate = parameter.clockGate + def lgPauseCycles = 5 + }; + def M_XRD = "b00000".U // int load + def M_XWR = "b00001".U // int store + def M_PFR = "b00010".U // prefetch with intent to read + def M_PFW = "b00011".U // prefetch with intent to write + def M_XA_SWAP = "b00100".U + def M_FLUSH_ALL = "b00101".U // flush all lines + def M_XLR = "b00110".U + def M_XSC = "b00111".U + def M_XA_ADD = "b01000".U + def M_XA_XOR = "b01001".U + def M_XA_OR = "b01010".U + def M_XA_AND = "b01011".U + def M_XA_MIN = "b01100".U + def M_XA_MAX = "b01101".U + def M_XA_MINU = "b01110".U + def M_XA_MAXU = "b01111".U + def M_PWR = "b10001".U // partial (masked) store + def M_SFENCE = "b10100".U // SFENCE.VMA + def M_HFENCEV = "b10101".U // HFENCE.VVMA + def M_HFENCEG = "b10110".U // HFENCE.GVMA + def M_WOK = "b10111".U // check write permissions but don't perform a write + def M_HLVX = "b10000".U // HLVX instruction + + def lgNXRegs = parameter.lgNXRegs + def coreDataBytes = parameter.coreDataBytes + def regAddrMask: Int = (1 << lgNXRegs) - 1 + def xLen: Int = parameter.xLen + def fLen: Option[Int] = parameter.fLen + def vaddrBits: Int = parameter.vaddrBits + def vaddrBitsExtended: Int = parameter.vaddrBitsExtended + def btbEntries: Int = parameter.btbEntries + def bhtHistoryLength: Option[Int] = parameter.bhtHistoryLength + def bhtCounterLength: Option[Int] = parameter.bhtCounterLength + def nBreakpoints: Int = parameter.nBreakpoints + def usingAtomics: Boolean = parameter.usingAtomics + def usingMulDiv: Boolean = parameter.usingMulDiv + def usingVector: Boolean = parameter.usingVector + def pipelinedMul: Boolean = parameter.pipelinedMul + def usingCompressed: Boolean = parameter.usingCompressed + def usingFPU: Boolean = parameter.usingFPU + def usingVM: Boolean = parameter.usingVM + def fastLoadByte: Boolean = parameter.fastLoadByte + def fastLoadWord: Boolean = parameter.fastLoadWord + def hypervisorExtraAddrBits: Int = parameter.hypervisorExtraAddrBits + def usingHypervisor: Boolean = parameter.usingHypervisor + def flushOnFenceI: Boolean = parameter.flushOnFenceI + def usingBTB: Boolean = parameter.usingBTB + def coreInstBytes: Int = parameter.coreInstBytes + def fetchWidth: Int = parameter.fetchWidth + def minFLen: Int = parameter.minFLen.getOrElse(0) + def hasDataECC: Boolean = parameter.hasDataECC // Signal outside from internal clock domain. @@ -140,115 +409,35 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val clockEnable = WireDefault(true.B) val clockEnableReg = RegInit(true.B) val gatedClock = - Option.when(rocketParams.clockGate)(ClockGate(clock, clockEnable, "rocket_clock_gate")).getOrElse(clock) + Option.when(rocketParams.clockGate)(ClockGate(io.clock, clockEnable)).getOrElse(io.clock) + + csr.io.clock := gatedClock + csr.io.reset := io.reset + instructionBuffer.io.clock := gatedClock + instructionBuffer.io.reset := io.reset + mulDiv.io.clock := gatedClock + mulDiv.io.reset := io.reset + mul.foreach(_.io.clock := gatedClock) + mul.foreach(_.io.reset := io.reset) // leaving gated-clock domain val gatedDomain = withClock(gatedClock)(new Gated) class Gated { // performance counters def pipelineIDToWB[T <: Data](x: T): T = RegEnable(RegEnable(RegEnable(x, !ctrlKilled), exPcValid), memPcValid) - // TODO: remove it and probe signal to verification modules - // format: off - val perfEvents: EventSets = new EventSets( - Seq( - new EventSet( - (mask, hits) => Mux(wbException, mask(0), wbValid && pipelineIDToWB((mask & hits).orR)), - Seq( - ("exception", () => false.B), - // TODO: why no FPU here? - ("load", () => idDecodeOutput(decoder.mem) && idDecodeOutput(decoder.memCommand) === M_XRD && !Option.when(usingFPU)(idDecodeOutput(decoder.fp)).getOrElse(false.B)), - ("store", () => idDecodeOutput(decoder.mem) && idDecodeOutput(decoder.memCommand) === M_XWR && !Option.when(usingFPU)(idDecodeOutput(decoder.fp)).getOrElse(false.B)), - ("system", () => idDecodeOutput(decoder.csr) =/= CSR.N), - ("arith", () => idDecodeOutput(decoder.wxd) && !( idDecodeOutput(decoder.isJal) || idDecodeOutput(decoder.isJalr) || idDecodeOutput(decoder.mem) || Option.when(usingFPU)(idDecodeOutput(decoder.fp)).getOrElse(false.B) || Option.when(usingMulDiv && pipelinedMul)(idDecodeOutput(decoder.mul)).getOrElse(false.B) || Option.when(usingMulDiv)(idDecodeOutput(decoder.div)).getOrElse(false.B) || idDecodeOutput(decoder.csr) =/= CSR.N )), - ("branch", () => idDecodeOutput(decoder.isBranch)), - ("jal", () => idDecodeOutput(decoder.isJal)), - ("jalr", () => idDecodeOutput(decoder.isJalr)) - ) ++ - Option.when(usingAtomics)(Seq( - ("amo", () => idDecodeOutput(decoder.mem) && (isAMO(idDecodeOutput(decoder.memCommand)) || idDecodeOutput(decoder.memCommand).isOneOf(M_XLR, M_XSC))) - )).getOrElse(Seq()) ++ - Option.when(usingMulDiv)(Seq( - ("mul", () => if (pipelinedMul) idDecodeOutput(decoder.mul) else idDecodeOutput(decoder.div) && (idDecodeOutput(decoder.aluFn) & aluFn.FN_DIV) =/= aluFn.FN_DIV), - ("div", () => if (pipelinedMul) idDecodeOutput(decoder.div) else idDecodeOutput(decoder.div) && (idDecodeOutput(decoder.aluFn) & aluFn.FN_DIV) === aluFn.FN_DIV) - )).getOrElse(Seq()) ++ - fpu.map(fpu => Seq( - ("fp load", () => idDecodeOutput(decoder.fp) && fpu.dec.ldst && fpu.dec.wen), - ("fp store", () => idDecodeOutput(decoder.fp) && fpu.dec.ldst && !fpu.dec.wen), - ("fp add", () => idDecodeOutput(decoder.fp) && fpu.dec.fma && fpu.dec.swap23), - ("fp mul", () => idDecodeOutput(decoder.fp) && fpu.dec.fma && !fpu.dec.swap23 && !fpu.dec.ren3), - ("fp mul-add", () => idDecodeOutput(decoder.fp) && fpu.dec.fma && fpu.dec.ren3), - ("fp div/sqrt", () => idDecodeOutput(decoder.fp) && (fpu.dec.div || fpu.dec.sqrt)), - ("fp other", () => idDecodeOutput(decoder.fp) && !(fpu.dec.ldst || fpu.dec.fma || fpu.dec.div || fpu.dec.sqrt )) - )).getOrElse(Seq()) - ), - new EventSet( - (mask, hits) => (mask & hits).orR, - Seq( - ("load-use interlock", () => idExHazard && exRegDecodeOutput(decoder.mem) || idMemHazard && memRegDecodeOutput(decoder.mem) || idWbHazard && wbRegDecodeOutput(decoder.mem) ), - ("long-latency interlock", () => idScoreboardHazard), - ("csr interlock", () => idExHazard && exRegDecodeOutput(decoder.csr) =/= CSR.N || idMemHazard && memRegDecodeOutput(decoder.csr) =/= CSR.N || idWbHazard && wbRegDecodeOutput(decoder.csr) =/= CSR.N), - ("I$ blocked", () => icacheBlocked), - ("D$ blocked", () => idDecodeOutput(decoder.mem) && dcacheBlocked), - ("branch misprediction", () => takePcMem && memDirectionMisprediction), - ("control-flow target misprediction", () => takePcMem && memMisprediction && memCfi && !memDirectionMisprediction && !icacheBlocked), - ("flush", () => wbRegFlushPipe), - ("replay", () => replayWb) - ) ++ - Option.when(usingMulDiv)(Seq( - ("mul/div interlock", () => idExHazard && (Option.when(pipelinedMul)(exRegDecodeOutput(decoder.mul)).getOrElse(false.B) || exRegDecodeOutput(decoder.div)) || idMemHazard && (Option.when(pipelinedMul)(memRegDecodeOutput(decoder.mul)).getOrElse(false.B) || memRegDecodeOutput(decoder.div)) || idWbHazard && wbRegDecodeOutput(decoder.div)) - )).getOrElse(Seq()) ++ - Option.when(usingFPU)(Seq( - ("fp interlock", () => idExHazard && exRegDecodeOutput(decoder.fp) || idMemHazard && memRegDecodeOutput(decoder.fp) || idWbHazard && wbRegDecodeOutput(decoder.fp) || idDecodeOutput(decoder.fp) && idStallFpu) - )).getOrElse(Seq()) - ), - new EventSet( - (mask, hits) => (mask & hits).orR, - Seq( - ("I$ miss", () => imem.perf.acquire), - ("D$ miss", () => dmem.perf.acquire), - ("D$ release", () =>dmem.perf.release), - ("ITLB miss", () => imem.perf.tlbMiss), - ("DTLB miss", () => dmem.perf.tlbMiss), - ("L2 TLB miss", () => ptw.perf.l2miss) - ) - ) - ) - ) - // format: on - - // Start RTL Here - // instantiate modules - // TODO: remove implicit parameter for them. - - val csr: CSRFile = Module(new CSRFile(perfEvents, coreParams.customCSRs.decls, hasBeu)) - - // TODO: move to Parameter Level or LazyModule level. - /** Decoder instantiated, input from IF, output to ID. */ - val decoderModule = Module(new RawModule { - override def desiredName: String = "RocketDecoder" - val instruction = IO(Input(UInt(32.W))) - val output = IO(Output(decoder.table.bundle)) - output := decoder.table.decode(instruction) - }) - val instructionBuffer: IBuf = Module(new IBuf) - val breakpointUnit: BreakpointUnit = Module(new BreakpointUnit(nBreakpoints)) - val arithmeticLogicUnit: ALU = Module(new ALU()) - val muldiv = Module( - new MulDiv(if (pipelinedMul) mulDivParams.copy(mulUnroll = 0) else mulDivParams, width = xLen, aluFn = aluFn) - ).suggestName(if (pipelinedMul) "div" else "muldiv") - val mul = pipelinedMul.option(Module(new PipelinedMultiplier(xLen, 2, aluFn = aluFn))) + // RF is not a Module. val rf = new RegFile(regAddrMask, xLen) // wire definations. - val idDecodeOutput: DecodeBundle = Wire(decoder.table.bundle) + val idDecodeOutput: DecodeBundle = Wire(chiselTypeOf(decoder.io.output)) val exRegExceptionInterrupt: Bool = Reg(Bool()) val exRegException: Bool = Reg(Bool()) val exRegValid: Bool = Reg(Bool()) val exRegRVC: Bool = Reg(Bool()) - val exRegBTBResponse: BTBResp = Reg(new BTBResp) + val exRegBTBResponse: BTBResp = Reg(new BTBResp(vaddrBits, btbEntries, fetchWidth, bhtHistoryLength, bhtCounterLength)) val exRegFlushPipe: Bool = Reg(Bool()) val exRegLoadUse: Bool = Reg(Bool()) val exRegCause: UInt = Reg(UInt()) @@ -258,16 +447,22 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val exRegMemSize: UInt = Reg(UInt()) // Option.when(usingHypervisor) val exRegHLS: Bool = Reg(Bool()) - val exRegInstruction: UInt = Reg(Bits()) + val exRegInstruction: UInt = Reg(UInt()) val exRegRawInstruction: UInt = Reg(UInt()) // TODO: what's this? val exRegWphit: Vec[Bool] = Reg(Vec(nBreakpoints, Bool())) - val exRegDecodeOutput: DecodeBundle = Reg(decoder.table.bundle) + val exRegDecodeOutput: DecodeBundle = Reg(chiselTypeOf(decoder.io.output)) val memRegExceptionInterrupt = Reg(Bool()) val memRegValid = Reg(Bool()) val memRegRVC = Reg(Bool()) - val memRegBTBResponse = Reg(new BTBResp) + val memRegBTBResponse = Reg(new BTBResp( + vaddrBits, + btbEntries, + fetchWidth, + bhtHistoryLength, + bhtCounterLength + )) val memRegException = Reg(Bool()) val memRegReplay = Reg(Bool()) val memRegFlushPipe = Reg(Bool()) @@ -277,15 +472,15 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val memRegStore = Reg(Bool()) val memRegSfence = Reg(Bool()) val memRegPc = Reg(UInt()) - val memRegInstruction = Reg(Bits()) + val memRegInstruction = Reg(UInt()) val memRegMemSize = Reg(UInt()) - val memRegDecodeOutput: DecodeBundle = Reg(decoder.table.bundle) + val memRegDecodeOutput: DecodeBundle = Reg(chiselTypeOf(decoder.io.output)) /** virtualization mode? */ val memRegHlsOrDv = Reg(Bool()) val memRegRawInstruction = Reg(UInt()) - val memRegWdata = Reg(Bits()) - val memRegRS2 = Reg(Bits()) + val memRegWdata = Reg(UInt()) + val memRegRS2 = Reg(UInt()) val memBranchTaken = Reg(Bool()) val takePcMem = Wire(Bool()) val memRegWphit = Reg(Vec(nBreakpoints, Bool())) @@ -297,15 +492,15 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val wbRegCause = Reg(UInt()) val wbRegSfence = Reg(Bool()) val wbRegPc = Reg(UInt()) - val wbRegDecodeOutput: DecodeBundle = Reg(decoder.table.bundle) + val wbRegDecodeOutput: DecodeBundle = Reg(chiselTypeOf(decoder.io.output)) val wbRegMemSize = Reg(UInt()) val wbRegHlsOrDv = Reg(Bool()) val wbRegHfenceV = Reg(Bool()) val wbRegHfenceG = Reg(Bool()) - val wbRegInstruction = Reg(Bits()) + val wbRegInstruction = Reg(UInt()) val wbRegRawInstruction = Reg(UInt()) - val wbRegWdata = Reg(Bits()) - val wbRegRS2 = Reg(Bits()) + val wbRegWdata = Reg(UInt()) + val wbRegRS2 = Reg(UInt()) val wbRegWphit = Reg(Vec(nBreakpoints, Bool())) val takePcWb = Wire(Bool()) @@ -313,19 +508,19 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val takePc = takePcMemWb // From IBUF to ID - instructionBuffer.io.imem <> imem.resp - val instructionBufferOut: DecoupledIO[Instruction] = instructionBuffer.io.inst.head + instructionBuffer.io.imem <> io.imem.resp + val instructionBufferOut = instructionBuffer.io.inst.head // TODO: does these really has its meaning? I don't think so:( val idExpandedInstruction: ExpandedInstruction = instructionBufferOut.bits.inst val idRawInstruction: UInt = instructionBufferOut.bits.raw val idInstruction: UInt = idExpandedInstruction.bits - idDecodeOutput := decoderModule.output + idDecodeOutput := decoder.io.output instructionBuffer.io.kill := takePc // 5. Instruction goes to Rocket Decoder - decoderModule.instruction := idInstruction + decoder.io.instruction := idInstruction // Optional circuit: Optional add this circuit for RVE. - def decodeReg(x: UInt): (Bool, UInt) = (x.extract(x.getWidth - 1, lgNXRegs).asBool, x(lgNXRegs - 1, 0)) + def decodeReg(x: UInt): (Bool, UInt) = ((if (x.getWidth - 1 < lgNXRegs) 0.U else x(x.getWidth - 1, lgNXRegs)).asBool, x(lgNXRegs - 1, 0)) val (idRaddr3Illegal: Bool, idRaddr3: UInt) = decodeReg(idExpandedInstruction.rs3) val (idRaddr2Illegal: Bool, idRaddr2: UInt) = decodeReg(idExpandedInstruction.rs2) val (idRaddr1Illegal: Bool, idRaddr1: UInt) = decodeReg(idExpandedInstruction.rs1) @@ -334,8 +529,8 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val idLoadUse: Bool = Wire(Bool()) val idRegFence: Bool = RegInit(false.B) // TODO: T1 needs to access RS1 and RS2 under some instructions. - // FP goes to a different path, decoder.rfs1 is never used... - val idRen: Seq[Bool] = IndexedSeq(idDecodeOutput(decoder.rxs1), idDecodeOutput(decoder.rxs2)) + // FP goes to a different path, parameter.decoderParameter.rfs1 is never used... + val idRen: Seq[Bool] = IndexedSeq(idDecodeOutput(parameter.decoderParameter.rxs1), idDecodeOutput(parameter.decoderParameter.rxs2)) val idRaddr: Seq[UInt] = IndexedSeq(idRaddr1, idRaddr2) // 6. Read RF out. val idRs: Seq[UInt] = idRaddr.map(rf.read) @@ -343,41 +538,44 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val ctrlKilled: Bool = Wire(Bool()) // TODO: additional decode out? - val idCsrEn: Bool = idDecodeOutput(decoder.csr).isOneOf(CSR.S, CSR.C, CSR.W) - val idSystemInstruction: Bool = idDecodeOutput(decoder.csr) === CSR.I - val idCsrRen: Bool = idDecodeOutput(decoder.csr).isOneOf(CSR.S, CSR.C) && idExpandedInstruction.rs1 === 0.U + + def isOneOf(x:UInt, s: Seq[UInt]): Bool = VecInit(s.map(x === _)).asUInt.orR + + val idCsrEn: Bool = isOneOf(idDecodeOutput(parameter.decoderParameter.csr), Seq(parameter.csrParameter.S, parameter.csrParameter.C, parameter.csrParameter.W)) + val idSystemInstruction: Bool = idDecodeOutput(parameter.decoderParameter.csr) === parameter.csrParameter.I + val idCsrRen: Bool = isOneOf(idDecodeOutput(parameter.decoderParameter.csr), Seq(parameter.csrParameter.S, parameter.csrParameter.C)) && idExpandedInstruction.rs1 === 0.U val idCsr = - Mux(idSystemInstruction && idDecodeOutput(decoder.mem), CSR.N, Mux(idCsrRen, CSR.R, idDecodeOutput(decoder.csr))) + Mux(idSystemInstruction && idDecodeOutput(parameter.decoderParameter.mem), parameter.csrParameter.N, Mux(idCsrRen, parameter.csrParameter.R, idDecodeOutput(parameter.decoderParameter.csr))) val idCsrFlush = idSystemInstruction || (idCsrEn && !idCsrRen && csr.io.decode(0).writeFlush) || - Option.when(usingVector)(idDecodeOutput(decoder.vectorCSR)).getOrElse(false.B) + Option.when(parameter.usingVector)(idDecodeOutput(parameter.decoderParameter.vectorCSR)).getOrElse(false.B) val idRfIllegal: Bool = - idRaddr2Illegal && idDecodeOutput(decoder.rxs2) || - idRaddr1Illegal && idDecodeOutput(decoder.rxs1) || - idWaddrIllegal && idDecodeOutput(decoder.wxd) + idRaddr2Illegal && idDecodeOutput(parameter.decoderParameter.rxs2) || + idRaddr1Illegal && idDecodeOutput(parameter.decoderParameter.rxs1) || + idWaddrIllegal && idDecodeOutput(parameter.decoderParameter.wxd) val idCsrIllegalRW: Bool = idCsrEn && (csr.io.decode(0).readIllegal || !idCsrRen && csr.io.decode(0).writeIllegal) val idSystemIllegal: Bool = !instructionBufferOut.bits.rvc && (idSystemInstruction && csr.io.decode(0).systemIllegal) val idAtomicIllegal: Option[Bool] = - Option.when(usingAtomics)(idDecodeOutput(decoder.amo) && !csr.io.status.isa('a' - 'a')) + Option.when(usingAtomics)(idDecodeOutput(parameter.decoderParameter.amo) && !csr.io.status.isa('a' - 'a')) val idMulDivIllegal: Option[Bool] = Option.when(usingMulDiv)( - Option.when(pipelinedMul)(idDecodeOutput(decoder.mul)).getOrElse(false.B) || - idDecodeOutput(decoder.div) && !csr.io.status.isa('m' - 'a') + Option.when(pipelinedMul)(idDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B) || + idDecodeOutput(parameter.decoderParameter.div) && !csr.io.status.isa('m' - 'a') ) val idCompressIllegal: Option[Bool] = Option.when(usingCompressed)(instructionBufferOut.bits.rvc && !csr.io.status.isa('c' - 'a')) val idFpIllegal: Option[Bool] = - fpu.map(fpu => idDecodeOutput(decoder.fp) && (csr.io.decode(0).fpIllegal || fpu.illegal_rm)) - val idDpIllegal: Option[Bool] = Option.when(usingFPU)(idDecodeOutput(decoder.dp) && !csr.io.status.isa('d' - 'a')) + io.fpu.map(fpu => idDecodeOutput(parameter.decoderParameter.fp) && (csr.io.decode(0).fpIllegal || fpu.illegal_rm)) + val idDpIllegal: Option[Bool] = Option.when(usingFPU)(idDecodeOutput(parameter.decoderParameter.dp) && !csr.io.status.isa('d' - 'a')) // TODO: vector illegal: // - vector is not enabled but a vector instruction is decoded. val idIllegalInstruction: Bool = - !idDecodeOutput(decoder.isLegal) || + !idDecodeOutput(parameter.decoderParameter.isLegal) || idRfIllegal || idCsrIllegalRW || idSystemIllegal || @@ -387,7 +585,7 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters idDpIllegal.getOrElse(false.B) || idCompressIllegal.getOrElse(false.B) val idVirtualInstruction: Bool = - idDecodeOutput(decoder.isLegal) && + idDecodeOutput(parameter.decoderParameter.isLegal) && ( (idCsrEn && !(!idCsrRen && csr.io.decode(0).writeIllegal) && @@ -395,21 +593,21 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters !instructionBufferOut.bits.rvc && idSystemInstruction && csr.io.decode(0).virtualSystemIllegal + ) ) - ) // stall decode for fences (now, for AMO.rl; later, for AMO.aq and FENCE) val idAmoAquire: Bool = idInstruction(26) val idAmoRelease: Bool = idInstruction(25) // TODO: what's this? val idFenceSucc: UInt = idInstruction(23, 20) - val idFenceNext: Bool = idDecodeOutput(decoder.fence) || idDecodeOutput(decoder.amo) && idAmoAquire - val idMemoryBusy: Bool = !dmem.ordered || dmem.req.valid + val idFenceNext: Bool = idDecodeOutput(parameter.decoderParameter.fence) || idDecodeOutput(parameter.decoderParameter.amo) && idAmoAquire + val idMemoryBusy: Bool = !io.dmem.ordered || io.dmem.req.valid val idDoFence = idMemoryBusy && - (idDecodeOutput(decoder.amo) && idAmoRelease || - idDecodeOutput(decoder.fenceI) || - idRegFence && idDecodeOutput(decoder.mem)) + (idDecodeOutput(parameter.decoderParameter.amo) && idAmoRelease || + idDecodeOutput(parameter.decoderParameter.fenceI) || + idRegFence && idDecodeOutput(parameter.decoderParameter.mem)) // TODO: if vector is non-empty, don't take breakpoint. breakpointUnit.io.status := csr.io.status @@ -424,32 +622,30 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val (idException, idCause) = checkExceptions( List( (csr.io.interrupt, csr.io.interruptCause), - (breakpointUnit.io.debug_if, CSR.debugTriggerCause.U), + (breakpointUnit.io.debug_if, parameter.csrParameter.debugTriggerCause.U), (breakpointUnit.io.xcpt_if, Causes.breakpoint.U), - (idException0.pf.inst, Causes.fetch_page_fault.U), - (idException0.gf.inst, Causes.fetch_guest_page_fault.U), - (idException0.ae.inst, Causes.fetch_access.U), - (idException1.pf.inst, Causes.fetch_page_fault.U), - (idException1.gf.inst, Causes.fetch_guest_page_fault.U), - (idException1.ae.inst, Causes.fetch_access.U), + (idException0.pf, Causes.fetch_page_fault.U), + (idException0.gf, Causes.fetch_guest_page_fault.U), + (idException0.ae, Causes.fetch_access.U), + (idException1.pf, Causes.fetch_page_fault.U), + (idException1.gf, Causes.fetch_guest_page_fault.U), + (idException1.ae, Causes.fetch_access.U), (idVirtualInstruction, Causes.virtual_instruction.U), (idIllegalInstruction, Causes.illegal_instruction.U) ) ) val idCoverCauses: Seq[(Int, String)] = List( - (CSR.debugTriggerCause, "DEBUG_TRIGGER"), + (parameter.csrParameter.debugTriggerCause, "DEBUG_TRIGGER"), (Causes.breakpoint, "BREAKPOINT"), (Causes.fetch_access, "FETCH_ACCESS"), (Causes.illegal_instruction, "ILLEGAL_INSTRUCTION") ) ++ Option.when(usingVM)((Causes.fetch_page_fault, "FETCH_PAGE_FAULT")) - // TODO: move it to verification module. - coverExceptions(idException, idCause, "DECODE", idCoverCauses) // Bypass signals val dcacheBypassData: UInt = - if (fastLoadByte) dmem.resp.bits.data(xLen - 1, 0) - else if (fastLoadWord) dmem.resp.bits.data_word_bypass(xLen - 1, 0) + if (fastLoadByte) io.dmem.resp.bits.data(xLen - 1, 0) + else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass(xLen - 1, 0) else wbRegWdata // detect bypass opportunities val exWaddr: UInt = exRegInstruction(11, 7) & regAddrMask.U @@ -457,43 +653,52 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val wbWaddr: UInt = wbRegInstruction(11, 7) & regAddrMask.U val bypassSources: Seq[(Bool, UInt, UInt)] = IndexedSeq( (true.B, 0.U, 0.U), // treat reading x0 as a bypass - (exRegValid && exRegDecodeOutput(decoder.wxd), exWaddr, memRegWdata), - (memRegValid && memRegDecodeOutput(decoder.wxd) && !memRegDecodeOutput(decoder.mem), memWaddr, wbRegWdata), - (memRegValid && memRegDecodeOutput(decoder.wxd), memWaddr, dcacheBypassData) + (exRegValid && exRegDecodeOutput(parameter.decoderParameter.wxd), exWaddr, memRegWdata), + (memRegValid && memRegDecodeOutput(parameter.decoderParameter.wxd) && !memRegDecodeOutput(parameter.decoderParameter.mem), memWaddr, wbRegWdata), + (memRegValid && memRegDecodeOutput(parameter.decoderParameter.wxd), memWaddr, dcacheBypassData) ) val idBypassSources: Seq[Seq[Bool]] = idRaddr.map(raddr => bypassSources.map(s => s._1 && s._2 === raddr)) // execute stage - val bypassMux: Seq[UInt] = bypassSources.map(_._3) + val bypassMux: Vec[UInt] = VecInit(bypassSources.map(_._3)) val exRegRsBypass: Vec[Bool] = Reg(Vec(idRaddr.size, Bool())) val exRegRsLSB: Vec[UInt] = Reg(Vec(idRaddr.size, UInt(log2Ceil(bypassSources.size).W))) val exRegRsMSB: Vec[UInt] = Reg(Vec(idRaddr.size, UInt())) val exRs: Seq[UInt] = Seq.tabulate(idRaddr.size)(i => Mux(exRegRsBypass(i), bypassMux(exRegRsLSB(i)), Cat(exRegRsMSB(i), exRegRsLSB(i))) ) - val exImm: SInt = ImmGen(exRegDecodeOutput(decoder.selImm), exRegInstruction) + val exImm: SInt = ImmGen(exRegDecodeOutput(parameter.decoderParameter.selImm), exRegInstruction) + + def A1_RS1 = 1.U(2.W) + def A1_PC = 2.U(2.W) + + def A2_ZERO = 0.U(2.W) + def A2_SIZE = 1.U(2.W) + def A2_RS2 = 2.U(2.W) + def A2_IMM = 3.U(2.W) + val exOp1: SInt = - MuxLookup(exRegDecodeOutput(decoder.selAlu1), 0.S)(Seq(A1_RS1 -> exRs(0).asSInt, A1_PC -> exRegPC.asSInt)) - val exOp2: SInt = MuxLookup(exRegDecodeOutput(decoder.selAlu2), 0.S)( + MuxLookup(exRegDecodeOutput(parameter.decoderParameter.selAlu1), 0.S)(Seq(A1_RS1 -> exRs(0).asSInt, A1_PC -> exRegPC.asSInt)) + val exOp2: SInt = MuxLookup(exRegDecodeOutput(parameter.decoderParameter.selAlu2), 0.S)( Seq(A2_RS2 -> exRs(1).asSInt, A2_IMM -> exImm, A2_SIZE -> Mux(exRegRVC, 2.S, 4.S)) ) - arithmeticLogicUnit.io.dw := exRegDecodeOutput(decoder.aluDoubleWords) - arithmeticLogicUnit.io.fn := exRegDecodeOutput(decoder.aluFn) - arithmeticLogicUnit.io.in2 := exOp2.asUInt - arithmeticLogicUnit.io.in1 := exOp1.asUInt + alu.io.dw := exRegDecodeOutput(parameter.decoderParameter.aluDoubleWords) + alu.io.fn := exRegDecodeOutput(parameter.decoderParameter.aluFn) + alu.io.in2 := exOp2.asUInt + alu.io.in1 := exOp1.asUInt // multiplier and divider // TODO: waive them if !usingMulDiv - muldiv.io.req.valid := exRegValid && Option.when(usingMulDiv)(exRegDecodeOutput(decoder.div)).getOrElse(false.B) - muldiv.io.req.bits.dw := exRegDecodeOutput(decoder.aluDoubleWords) - muldiv.io.req.bits.fn := exRegDecodeOutput(decoder.aluFn) - muldiv.io.req.bits.in1 := exRs(0) - muldiv.io.req.bits.in2 := exRs(1) - muldiv.io.req.bits.tag := exWaddr + mulDiv.io.req.valid := exRegValid && Option.when(usingMulDiv)(exRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) + mulDiv.io.req.bits.dw := exRegDecodeOutput(parameter.decoderParameter.aluDoubleWords) + mulDiv.io.req.bits.fn := exRegDecodeOutput(parameter.decoderParameter.aluFn) + mulDiv.io.req.bits.in1 := exRs(0) + mulDiv.io.req.bits.in2 := exRs(1) + mulDiv.io.req.bits.tag := exWaddr mul.foreach { m => - m.io.req.valid := exRegValid && exRegDecodeOutput(decoder.mul) - m.io.req.bits := muldiv.io.req.bits + m.io.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mul) + m.io.req.bits := mulDiv.io.req.bits } exRegValid := !ctrlKilled @@ -505,40 +710,41 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters when(!ctrlKilled) { exRegDecodeOutput := idDecodeOutput exRegRVC := instructionBufferOut.bits.rvc - exRegDecodeOutput(decoder.csr) := idCsr - when(idDecodeOutput(decoder.fence) && idFenceSucc === 0.U) { idRegPause := true.B } + exRegDecodeOutput(parameter.decoderParameter.csr) := idCsr + when(idDecodeOutput(parameter.decoderParameter.fence) && idFenceSucc === 0.U) { idRegPause := true.B } when(idFenceNext) { idRegFence := true.B } when(idException) { // pass PC down ALU writeback pipeline for badaddr - exRegDecodeOutput(decoder.aluFn) := aluFn.FN_ADD - exRegDecodeOutput(decoder.aluDoubleWords) := DW_XPR - exRegDecodeOutput(decoder.selAlu1) := A1_RS1 // badaddr := instruction - exRegDecodeOutput(decoder.selAlu2) := A2_ZERO + exRegDecodeOutput(parameter.decoderParameter.aluFn) := parameter.aluParameter.FN_ADD + exRegDecodeOutput(parameter.decoderParameter.aluDoubleWords) := true.B + exRegDecodeOutput(parameter.decoderParameter.selAlu1) := A1_RS1 // badaddr := instruction + exRegDecodeOutput(parameter.decoderParameter.selAlu2) := A2_ZERO when(idException1.asUInt.orR) { // badaddr := PC+2 - exRegDecodeOutput(decoder.selAlu1) := A1_PC - exRegDecodeOutput(decoder.selAlu2) := A2_SIZE + exRegDecodeOutput(parameter.decoderParameter.selAlu1) := A1_PC + exRegDecodeOutput(parameter.decoderParameter.selAlu2) := A2_SIZE exRegRVC := true.B } when(breakpointUnit.io.xcpt_if || idException0.asUInt.orR) { // badaddr := PC - exRegDecodeOutput(decoder.selAlu1) := A1_PC - exRegDecodeOutput(decoder.selAlu2) := A2_ZERO + exRegDecodeOutput(parameter.decoderParameter.selAlu1) := A1_PC + exRegDecodeOutput(parameter.decoderParameter.selAlu2) := A2_ZERO } } - exRegFlushPipe := idDecodeOutput(decoder.fenceI) || idCsrFlush + exRegFlushPipe := idDecodeOutput(parameter.decoderParameter.fenceI) || idCsrFlush exRegLoadUse := idLoadUse + exRegHLS := usingHypervisor.B && - idSystemInstruction && - idDecodeOutput(decoder.memCommand).isOneOf(M_XRD, M_XWR, M_HLVX) + idSystemInstruction && + isOneOf(idDecodeOutput(parameter.decoderParameter.memCommand), Seq(M_XRD, M_XWR, M_HLVX)) exRegMemSize := Mux(usingHypervisor.B && idSystemInstruction, idInstruction(27, 26), idInstruction(13, 12)) - when(idDecodeOutput(decoder.memCommand).isOneOf(M_SFENCE, M_HFENCEV, M_HFENCEG, M_FLUSH_ALL)) { + when(isOneOf(idDecodeOutput(parameter.decoderParameter.memCommand), Seq(M_SFENCE, M_HFENCEV, M_HFENCEG, M_FLUSH_ALL)) ) { exRegMemSize := Cat(idRaddr2 =/= 0.U, idRaddr1 =/= 0.U) } - when(idDecodeOutput(decoder.memCommand) === M_SFENCE && csr.io.status.v) { - exRegDecodeOutput(decoder.memCommand) := M_HFENCEV + when(idDecodeOutput(parameter.decoderParameter.memCommand) === M_SFENCE && csr.io.status.v) { + exRegDecodeOutput(parameter.decoderParameter.memCommand) := M_HFENCEV } if (flushOnFenceI) { - when(idDecodeOutput(decoder.fenceI)) { + when(idDecodeOutput(parameter.decoderParameter.fenceI)) { exRegMemSize := 0.U } } @@ -571,38 +777,38 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters } // replay inst in ex stage? val exPcValid: Bool = exRegValid || exRegReplay || exRegExceptionInterrupt - val wbDcacheMiss: Bool = wbRegDecodeOutput(decoder.mem) && !dmem.resp.valid - val replayExStructural: Bool = exRegDecodeOutput(decoder.mem) && !dmem.req.ready || Option - .when(usingMulDiv)(exRegDecodeOutput(decoder.div)) - .getOrElse(false.B) && !muldiv.io.req.ready + val wbDcacheMiss: Bool = wbRegDecodeOutput(parameter.decoderParameter.mem) && !io.dmem.resp.valid + val replayExStructural: Bool = exRegDecodeOutput(parameter.decoderParameter.mem) && !io.dmem.req.ready || Option + .when(usingMulDiv)(exRegDecodeOutput(parameter.decoderParameter.div)) + .getOrElse(false.B) && !mulDiv.io.req.ready val replayExLoadUse: Bool = wbDcacheMiss && exRegLoadUse val replayEx: Bool = exRegReplay || (exRegValid && (replayExStructural || replayExLoadUse)) val ctrlKillx: Bool = takePcMemWb || replayEx || !exRegValid // detect 2-cycle load-use delay for LB/LH/SC - val exSlowBypass: Bool = exRegDecodeOutput(decoder.memCommand) === M_XSC || exRegMemSize < 2.U + val exSlowBypass: Bool = exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_XSC || exRegMemSize < 2.U val exSfence: Bool = usingVM.B && - exRegDecodeOutput(decoder.mem) && - (exRegDecodeOutput(decoder.memCommand) === M_SFENCE || - exRegDecodeOutput(decoder.memCommand) === M_HFENCEV || - exRegDecodeOutput(decoder.memCommand) === M_HFENCEG) + exRegDecodeOutput(parameter.decoderParameter.mem) && + (exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_SFENCE || + exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEV || + exRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEG) val (exException: Bool, exCause: UInt) = checkExceptions( List((exRegExceptionInterrupt || exRegException, exRegCause)) ) val exCoverCauses: Seq[(Int, String)] = idCoverCauses - coverExceptions(exException, exCause, "EXECUTE", exCoverCauses) +// coverExceptions(exException, exCause, "EXECUTE", exCoverCauses) // memory stage val memPcValid: Bool = memRegValid || memRegReplay || memRegExceptionInterrupt val memBranchTarget: SInt = memRegPc.asSInt + Mux( - memRegDecodeOutput(decoder.isBranch) && memBranchTaken, - ImmGen(IMM_SB, memRegInstruction), - Mux(memRegDecodeOutput(decoder.isJal), ImmGen(IMM_UJ, memRegInstruction), Mux(memRegRVC, 2.S, 4.S)) + memRegDecodeOutput(parameter.decoderParameter.isBranch) && memBranchTaken, + ImmGen(ImmGen.IMM_SB, memRegInstruction), + Mux(memRegDecodeOutput(parameter.decoderParameter.isJal), ImmGen(ImmGen.IMM_UJ, memRegInstruction), Mux(memRegRVC, 2.S, 4.S)) ) val memNextPC: UInt = (Mux( - memRegDecodeOutput(decoder.isJalr) || memRegSfence, + memRegDecodeOutput(parameter.decoderParameter.isJalr) || memRegSfence, encodeVirtualAddress(memRegWdata, memRegWdata).asSInt, memBranchTarget ) & (-2).S).asUInt @@ -618,18 +824,18 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters ) val memNpcMisaligned: Bool = !csr.io.status.isa('c' - 'a') && memNextPC(1) && !memRegSfence val memIntWdata: UInt = Mux( - !memRegException && (memRegDecodeOutput(decoder.isJalr) ^ memNpcMisaligned), + !memRegException && (memRegDecodeOutput(parameter.decoderParameter.isJalr) ^ memNpcMisaligned), memBranchTarget, memRegWdata.asSInt ).asUInt val memCfi: Bool = - memRegDecodeOutput(decoder.isBranch) || memRegDecodeOutput(decoder.isJalr) || memRegDecodeOutput(decoder.isJal) + memRegDecodeOutput(parameter.decoderParameter.isBranch) || memRegDecodeOutput(parameter.decoderParameter.isJalr) || memRegDecodeOutput(parameter.decoderParameter.isJal) val memCfiTaken: Bool = - (memRegDecodeOutput(decoder.isBranch) && memBranchTaken) || memRegDecodeOutput( - decoder.isJalr - ) || memRegDecodeOutput(decoder.isJal) + (memRegDecodeOutput(parameter.decoderParameter.isBranch) && memBranchTaken) || memRegDecodeOutput( + parameter.decoderParameter.isJalr + ) || memRegDecodeOutput(parameter.decoderParameter.isJal) val memDirectionMisprediction: Bool = - memRegDecodeOutput(decoder.isBranch) && memBranchTaken =/= (usingBTB.B && memRegBTBResponse.taken) + memRegDecodeOutput(parameter.decoderParameter.isBranch) && memBranchTaken =/= (usingBTB.B && memRegBTBResponse.taken) val memMisprediction: Bool = if (usingBTB) memWrongNpc else memCfiTaken takePcMem := memRegValid && !memRegException && (memMisprediction || memRegSfence) @@ -645,8 +851,15 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters }.elsewhen(exPcValid) { memRegDecodeOutput := exRegDecodeOutput memRegRVC := exRegRVC - memRegLoad := exRegDecodeOutput(decoder.mem) && isRead(exRegDecodeOutput(decoder.memCommand)) - memRegStore := exRegDecodeOutput(decoder.mem) && isWrite(exRegDecodeOutput(decoder.memCommand)) + + def isAMOLogical(cmd: UInt) = isOneOf(cmd, Seq(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND)) + def isAMOArithmetic(cmd: UInt) = isOneOf(cmd, Seq(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU)) + def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd) + def isRead(cmd: UInt) = isOneOf(cmd, Seq(M_XRD, M_HLVX, M_XLR, M_XSC)) || isAMO(cmd) + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd) + + memRegLoad := exRegDecodeOutput(parameter.decoderParameter.mem) && isRead(exRegDecodeOutput(parameter.decoderParameter.memCommand)) + memRegStore := exRegDecodeOutput(parameter.decoderParameter.mem) && isWrite(exRegDecodeOutput(parameter.decoderParameter.memCommand)) memRegSfence := exSfence memRegBTBResponse := exRegBTBResponse memRegFlushPipe := exRegFlushPipe @@ -657,24 +870,24 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters memRegInstruction := exRegInstruction memRegRawInstruction := exRegRawInstruction memRegMemSize := exRegMemSize - memRegHlsOrDv := dmem.req.bits.dv + memRegHlsOrDv := io.dmem.req.bits.dv memRegPc := exRegPC // IDecode ensured they are 1H - memRegWdata := arithmeticLogicUnit.io.out - memBranchTaken := arithmeticLogicUnit.io.cmp_out + memRegWdata := alu.io.out + memBranchTaken := alu.io.cmp_out when( - exRegDecodeOutput(decoder.rxs2) && (exRegDecodeOutput(decoder.mem) || exSfence) + exRegDecodeOutput(parameter.decoderParameter.rxs2) && (exRegDecodeOutput(parameter.decoderParameter.mem) || exSfence) ) { val size = exRegMemSize memRegRS2 := new StoreGen(size, 0.U, exRs(1), coreDataBytes).data - }.elsewhen(exRegDecodeOutput(decoder.rxs2) && exRegDecodeOutput(decoder.vector)) { + }.elsewhen(exRegDecodeOutput(parameter.decoderParameter.rxs2) && Option.when(usingVector)(exRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) { // for setvl memRegRS2 := exRs(1) } - when(exRegDecodeOutput(decoder.isJalr) && csr.io.status.debug) { + when(exRegDecodeOutput(parameter.decoderParameter.isJalr) && csr.io.status.debug) { // flush I$ on D-mode JALR to effect uncached fetch without D$ flush - memRegDecodeOutput(decoder.fenceI) := true.B + memRegDecodeOutput(parameter.decoderParameter.fenceI) := true.B memRegFlushPipe := true.B } } @@ -682,7 +895,7 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val memBreakpoint = (memRegLoad && breakpointUnit.io.xcpt_ld) || (memRegStore && breakpointUnit.io.xcpt_st) val memDebugBreakpoint = (memRegLoad && breakpointUnit.io.debug_ld) || (memRegStore && breakpointUnit.io.debug_st) val (memLoadStoreException, memLoadStoreCause) = checkExceptions( - List((memDebugBreakpoint, CSR.debugTriggerCause.U), (memBreakpoint, Causes.breakpoint.U)) + List((memDebugBreakpoint, parameter.csrParameter.debugTriggerCause.U), (memBreakpoint, Causes.breakpoint.U)) ) val (memException, memCause) = checkExceptions( @@ -693,20 +906,20 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters ) ) - val memCoverCauses = (exCoverCauses ++ List( - (CSR.debugTriggerCause, "DEBUG_TRIGGER"), - (Causes.breakpoint, "BREAKPOINT"), - (Causes.misaligned_fetch, "MISALIGNED_FETCH") - )).distinct - coverExceptions(memException, memCause, "MEMORY", memCoverCauses) +// val memCoverCauses = (exCoverCauses ++ List( +// (CSR.debugTriggerCause, "DEBUG_TRIGGER"), +// (Causes.breakpoint, "BREAKPOINT"), +// (Causes.misaligned_fetch, "MISALIGNED_FETCH") +// )).distinct +// coverExceptions(memException, memCause, "MEMORY", memCoverCauses) val dcacheKillMem = - memRegValid && memRegDecodeOutput(decoder.wxd) && dmem.replay_next // structural hazard on writeback port + memRegValid && memRegDecodeOutput(parameter.decoderParameter.wxd) && io.dmem.replay_next // structural hazard on writeback port // TODO: vectorKillMem? - val fpuKillMem = fpu.map(fpu => memRegValid && memRegDecodeOutput(decoder.fp) && fpu.nack_mem) + val fpuKillMem = io.fpu.map(fpu => memRegValid && memRegDecodeOutput(parameter.decoderParameter.fp) && fpu.nack_mem) val replayMem = dcacheKillMem || memRegReplay || fpuKillMem.getOrElse(false.B) val killmCommon = dcacheKillMem || takePcWb || memRegException || !memRegValid - muldiv.io.kill := killmCommon && RegNext(muldiv.io.req.fire) + mulDiv.io.kill := killmCommon && RegNext(mulDiv.io.req.fire) val ctrlKillm = killmCommon || memException || fpuKillMem.getOrElse(false.B) // writeback stage @@ -717,16 +930,16 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters when(memPcValid) { wbRegDecodeOutput := memRegDecodeOutput wbRegSfence := memRegSfence - wbRegWdata := fpu + wbRegWdata := io.fpu .map(fpu => Mux( - !memRegException && memRegDecodeOutput(decoder.fp) && memRegDecodeOutput(decoder.wxd), + !memRegException && memRegDecodeOutput(parameter.decoderParameter.fp) && memRegDecodeOutput(parameter.decoderParameter.wxd), fpu.toint_data, memIntWdata ) ) .getOrElse(memIntWdata) - when(memRegSfence || memRegDecodeOutput(decoder.vector)) { + when(memRegSfence || Option.when(usingVector)(memRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) { wbRegRS2 := memRegRS2 } wbRegCause := memCause @@ -734,26 +947,26 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters wbRegRawInstruction := memRegRawInstruction wbRegMemSize := memRegMemSize wbRegHlsOrDv := memRegHlsOrDv - wbRegHfenceV := memRegDecodeOutput(decoder.memCommand) === M_HFENCEV - wbRegHfenceG := memRegDecodeOutput(decoder.memCommand) === M_HFENCEG + wbRegHfenceV := memRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEV + wbRegHfenceG := memRegDecodeOutput(parameter.decoderParameter.memCommand) === M_HFENCEG wbRegPc := memRegPc - wbRegWphit := memRegWphit | breakpointUnit.io.bpwatch.map { bpw => - (bpw.rvalid(0) && memRegLoad) || (bpw.wvalid(0) && memRegStore) - } + wbRegWphit.lazyZip(memRegWphit).lazyZip(breakpointUnit.io.bpwatch).foreach {case (wbRegWphit, memRegWphit, bpw) => + wbRegWphit := memRegWphit || ((bpw.rvalid(0) && memRegLoad) || (bpw.wvalid(0) && memRegStore)) + } } val (wbException, wbCause) = checkExceptions( List( (wbRegException, wbRegCause), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.pf.st, Causes.store_page_fault.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.pf.ld, Causes.load_page_fault.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.gf.st, Causes.store_guest_page_fault.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.gf.ld, Causes.load_guest_page_fault.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ae.st, Causes.store_access.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ae.ld, Causes.load_access.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ma.st, Causes.misaligned_store.U), - (wbRegValid && wbRegDecodeOutput(decoder.mem) && dmem.s2_xcpt.ma.ld, Causes.misaligned_load.U) + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.pf.st, Causes.store_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.pf.ld, Causes.load_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.gf.st, Causes.store_guest_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.gf.ld, Causes.load_guest_page_fault.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ae.st, Causes.store_access.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ae.ld, Causes.load_access.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ma.st, Causes.misaligned_store.U), + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.mem) && io.dmem.s2_xcpt.ma.ld, Causes.misaligned_load.U) ) ) @@ -779,58 +992,58 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters ) ) .getOrElse(Seq()) - coverExceptions(wbException, wbCause, "WRITEBACK", wbCoverCauses) +// coverExceptions(wbException, wbCause, "WRITEBACK", wbCoverCauses) val wbPcValid: Bool = wbRegValid || wbRegReplay || wbRegException - val wbWxd: Bool = wbRegValid && wbRegDecodeOutput(decoder.wxd) + val wbWxd: Bool = wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.wxd) val wbSetSboard: Bool = wbDcacheMiss || - Option.when(usingMulDiv)(wbRegDecodeOutput(decoder.div)).getOrElse(false.B) || + Option.when(usingMulDiv)(wbRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) || Option .when(usingVector) { // 8. set Int scoreboard - wbRegDecodeOutput(decoder.wxd) && wbRegDecodeOutput(decoder.vector) && !wbRegDecodeOutput(decoder.vectorCSR) + wbRegDecodeOutput(parameter.decoderParameter.wxd) && wbRegDecodeOutput(parameter.decoderParameter.vector) && !wbRegDecodeOutput(parameter.decoderParameter.vectorCSR) } .getOrElse(false.B) - val replayWbCommon: Bool = dmem.s2_nack || wbRegReplay + val replayWbCommon: Bool = io.dmem.s2_nack || wbRegReplay val replayWbCsr: Bool = wbRegValid && csr.io.rwStall val replayWb: Bool = replayWbCommon || replayWbCsr takePcWb := replayWb || wbException || csr.io.eret || wbRegFlushPipe // writeback arbitration - val dmemResponseXpu: Bool = !dmem.resp.bits.tag(0).asBool - val dmemResponseFpu: Bool = dmem.resp.bits.tag(0).asBool - val dmemResponseWaddr: UInt = dmem.resp.bits.tag(5, 1) - val dmemResponseValid: Bool = dmem.resp.valid && dmem.resp.bits.has_data - val dmemResponseReplay: Bool = dmemResponseValid && dmem.resp.bits.replay + val dmemResponseXpu: Bool = !io.dmem.resp.bits.tag(0).asBool + val dmemResponseFpu: Bool = io.dmem.resp.bits.tag(0).asBool + val dmemResponseWaddr: UInt = io.dmem.resp.bits.tag(5, 1) + val dmemResponseValid: Bool = io.dmem.resp.valid && io.dmem.resp.bits.has_data + val dmemResponseReplay: Bool = dmemResponseValid && io.dmem.resp.bits.replay - muldiv.io.resp.ready := !wbWxd - val longlatencyWdata: UInt = WireDefault(muldiv.io.resp.bits.data) - val longlatencyWaddress: UInt = WireDefault(muldiv.io.resp.bits.tag) - val longLatencyWenable: Bool = WireDefault(muldiv.io.resp.fire) + mulDiv.io.resp.ready := !wbWxd + val longlatencyWdata: UInt = WireDefault(mulDiv.io.resp.bits.data) + val longlatencyWaddress: UInt = WireDefault(mulDiv.io.resp.bits.tag) + val longLatencyWenable: Bool = WireDefault(mulDiv.io.resp.fire) when(dmemResponseReplay && dmemResponseXpu) { - muldiv.io.resp.ready := false.B + mulDiv.io.resp.ready := false.B longlatencyWaddress := dmemResponseWaddr longLatencyWenable := true.B } val wbValid = wbRegValid && !replayWb && !wbException - val wbWen = wbValid && wbRegDecodeOutput(decoder.wxd) + val wbWen = wbValid && wbRegDecodeOutput(parameter.decoderParameter.wxd) // RF is at WB stage val rfWen = wbWen || longLatencyWenable val rfWaddr = Mux(longLatencyWenable, longlatencyWaddress, wbWaddr) val rfWdata = Mux( dmemResponseValid && dmemResponseXpu, - dmem.resp.bits.data(xLen - 1, 0), + io.dmem.resp.bits.data(xLen - 1, 0), Mux( longLatencyWenable, longlatencyWdata, Mux( - (wbRegDecodeOutput(decoder.csr) =/= CSR.N) || wbRegDecodeOutput(decoder.vectorCSR), + (wbRegDecodeOutput(parameter.decoderParameter.csr) =/= parameter.csrParameter.N) || Option.when(usingVector)(wbRegDecodeOutput(parameter.decoderParameter.vectorCSR)).getOrElse(false.B), csr.io.rw.rdata, Mux( - Option.when(usingMulDiv && pipelinedMul)(wbRegDecodeOutput(decoder.mul)).getOrElse(false.B), + Option.when(usingMulDiv && pipelinedMul)(wbRegDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B), mul.map(_.io.resp.bits.data).getOrElse(wbRegWdata), wbRegWdata ) @@ -840,7 +1053,7 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters when(rfWen) { rf.write(rfWaddr, rfWdata) } // hook up control/status regfile - csr.io.ungatedClock := clock + csr.io.ungatedClock := io.clock csr.io.decode(0).inst := idInstruction csr.io.exception := wbException csr.io.cause := wbCause @@ -849,72 +1062,73 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters if (usingCompressed) Cat(Mux(wbRegRawInstruction(1, 0).andR, wbRegInstruction >> 16, 0.U), wbRegRawInstruction(15, 0)) else wbRegInstruction - ) - csr.io.interrupts := interrupts - csr.io.hartid := hartid - fpu.map { fpu => + ) + csr.io.interrupts.tileInterrupts := io.interrupts + csr.io.interrupts.buserror.foreach( _ := io.buserror ) + csr.io.hartid := io.hartid + io.fpu.map { fpu => fpu.fcsr_rm := csr.io.fcsrRm csr.io.fcsrFlags := fpu.fcsr_flags fpu.time := csr.io.time(31, 0) - fpu.hartid := hartid + fpu.hartid := io.hartid }.getOrElse { csr.io.fcsrFlags := DontCare } csr.io.pc := wbRegPc val tvalDmemAddr = !wbRegException val tvalAnyAddr = tvalDmemAddr || - wbRegCause.isOneOf( + isOneOf(wbRegCause, Seq( Causes.breakpoint.U, Causes.fetch_access.U, Causes.fetch_page_fault.U, Causes.fetch_guest_page_fault.U - ) + )) val tvalInstruction = wbRegCause === Causes.illegal_instruction.U val tvalValid = wbException && (tvalAnyAddr || tvalInstruction) csr.io.gva := wbException && (tvalAnyAddr && csr.io.status.v || tvalDmemAddr && wbRegHlsOrDv) csr.io.tval := Mux(tvalValid, encodeVirtualAddress(wbRegWdata, wbRegWdata), 0.U) csr.io.htval := { val htvalValidImem = wbRegException && wbRegCause === Causes.fetch_guest_page_fault.U - val htvalImem = Mux(htvalValidImem, imem.gpa.bits, 0.U) - assert(!htvalValidImem || imem.gpa.valid) + val htvalImem = Mux(htvalValidImem, io.imem.gpa.bits, 0.U) + assert(!htvalValidImem || io.imem.gpa.valid) val htvalValidDmem = - wbException && tvalDmemAddr && dmem.s2_xcpt.gf.asUInt.orR && !dmem.s2_xcpt.pf.asUInt.orR - val htvalDmem = Mux(htvalValidDmem, dmem.s2_gpa, 0.U) + wbException && tvalDmemAddr && io.dmem.s2_xcpt.gf.asUInt.orR && !io.dmem.s2_xcpt.pf.asUInt.orR + val htvalDmem = Mux(htvalValidDmem, io.dmem.s2_gpa, 0.U) (htvalDmem | htvalImem) >> hypervisorExtraAddrBits } - ptw.ptbr := csr.io.ptbr - ptw.hgatp := csr.io.hgatp - ptw.vsatp := csr.io.vsatp - ptw.customCSRs.csrs.zip(csr.io.customCSRs).foreach { case (lhs, rhs) => lhs <> rhs } - ptw.status := csr.io.status - ptw.hstatus := csr.io.hstatus - ptw.gstatus := csr.io.gstatus - ptw.pmp := csr.io.pmp + io.ptw.ptbr := csr.io.ptbr + io.ptw.hgatp := csr.io.hgatp + io.ptw.vsatp := csr.io.vsatp +// io.ptw.customCSRs.csrs.zip(csr.io.customCSRs).foreach { case (lhs, rhs) => lhs <> rhs } + io.ptw.status := csr.io.status + io.ptw.hstatus := csr.io.hstatus + io.ptw.gstatus := csr.io.gstatus + io.ptw.pmp := csr.io.pmp csr.io.rw.addr := wbRegInstruction(31, 20) - csr.io.rw.cmd := CSR.maskCmd(wbRegValid, wbRegDecodeOutput(decoder.csr)) + csr.io.rw.cmd := parameter.csrParameter.maskCmd(wbRegValid, wbRegDecodeOutput(parameter.decoderParameter.csr)) csr.io.rw.wdata := wbRegWdata - csr.io.vectorCsr.foreach(_ := wbRegDecodeOutput(decoder.vectorCSR)) + csr.io.vectorCsr.foreach(_ := wbRegDecodeOutput(parameter.decoderParameter.vectorCSR)) csr.io.wbRegRS2.foreach(_ := wbRegRS2) - bpwatch.zip(wbRegWphit).zip(csr.io.bp) - bpwatch.lazyZip(wbRegWphit).lazyZip(csr.io.bp).foreach { + io.bpwatch.zip(wbRegWphit).zip(csr.io.bp) + io.bpwatch.lazyZip(wbRegWphit).lazyZip(csr.io.bp).foreach { case (iobpw, wphit, bp) => - iobpw.valid(0) := wphit + iobpw.valid := wphit iobpw.action := bp.control.action // tie off bpwatch valids - iobpw.rvalid.foreach(_ := false.B) - iobpw.wvalid.foreach(_ := false.B) - iobpw.ivalid.foreach(_ := false.B) + iobpw.rvalid := false.B + iobpw.wvalid := false.B + iobpw.ivalid := false.B } val hazardTargets = Seq( - (idDecodeOutput(decoder.rxs1) && idRaddr1 =/= 0.U, idRaddr1), - (idDecodeOutput(decoder.rxs2) && idRaddr2 =/= 0.U, idRaddr2), - (idDecodeOutput(decoder.wxd) && idWaddr =/= 0.U, idWaddr) + (idDecodeOutput(parameter.decoderParameter.rxs1) && idRaddr1 =/= 0.U, idRaddr1), + (idDecodeOutput(parameter.decoderParameter.rxs2) && idRaddr2 =/= 0.U, idRaddr2), + (idDecodeOutput(parameter.decoderParameter.wxd) && idWaddr =/= 0.U, idWaddr) ) - val fpHazardTargets = fpu.map(fpu => + val fpHazardTargets = io.fpu.map(fpu => Seq( (fpu.dec.ren1, idRaddr1), (fpu.dec.ren2, idRaddr2), @@ -927,9 +1141,9 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters scoreboard.clear(longLatencyWenable, longlatencyWaddress) def idScoreboardClearBypass(r: UInt): Bool = { // ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check - if (tileParams.dcache.get.dataECC.isEmpty) longLatencyWenable && longlatencyWaddress === r + if (!hasDataECC) longLatencyWenable && longlatencyWaddress === r else - muldiv.io.resp.fire && muldiv.io.resp.bits.tag === r || dmemResponseReplay && dmemResponseXpu && dmemResponseWaddr === r + mulDiv.io.resp.fire && mulDiv.io.resp.bits.tag === r || dmemResponseReplay && dmemResponseXpu && dmemResponseWaddr === r } val idScoreboardHazard: Bool = checkHazards(hazardTargets, rd => scoreboard.read(rd) && !idScoreboardClearBypass(rd)) @@ -937,15 +1151,15 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. val exCannotBypass: Bool = - exRegDecodeOutput(decoder.csr) =/= CSR.N || - exRegDecodeOutput(decoder.isJalr) || - exRegDecodeOutput(decoder.mem) || - Option.when(usingMulDiv && pipelinedMul)(exRegDecodeOutput(decoder.mul)).getOrElse(false.B) || - Option.when(usingMulDiv)(exRegDecodeOutput(decoder.div)).getOrElse(false.B) || - Option.when(usingFPU)(exRegDecodeOutput(decoder.fp)).getOrElse(false.B) - val dataHazardEx: Bool = exRegDecodeOutput(decoder.wxd) && checkHazards(hazardTargets, _ === exWaddr) + exRegDecodeOutput(parameter.decoderParameter.csr) =/= parameter.csrParameter.N || + exRegDecodeOutput(parameter.decoderParameter.isJalr) || + exRegDecodeOutput(parameter.decoderParameter.mem) || + Option.when(usingMulDiv && pipelinedMul)(exRegDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B) || + Option.when(usingMulDiv)(exRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) || + Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B) + val dataHazardEx: Bool = exRegDecodeOutput(parameter.decoderParameter.wxd) && checkHazards(hazardTargets, _ === exWaddr) val fpDataHazardEx: Option[Bool] = fpHazardTargets.map(fpHazardTargets => - idDecodeOutput(decoder.fp) && exRegDecodeOutput(decoder.wfd) && checkHazards(fpHazardTargets, _ === exWaddr) + idDecodeOutput(parameter.decoderParameter.fp) && exRegDecodeOutput(parameter.decoderParameter.wfd) && checkHazards(fpHazardTargets, _ === exWaddr) ) val idExHazard: Bool = exRegValid && (dataHazardEx && exCannotBypass || fpDataHazardEx.getOrElse(false.B)) @@ -955,42 +1169,38 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters if (fastLoadWord) (!fastLoadByte).B && memRegSlowBypass else true.B val memCannotBypass: Bool = - memRegDecodeOutput(decoder.csr) =/= CSR.N || - memRegDecodeOutput(decoder.mem) && memMemCmdBh || - Option.when(usingMulDiv && pipelinedMul)(memRegDecodeOutput(decoder.mul)).getOrElse(false.B) || - Option.when(usingMulDiv)(memRegDecodeOutput(decoder.div)).getOrElse(false.B) || - Option.when(usingFPU)(memRegDecodeOutput(decoder.fp)).getOrElse(false.B) - val dataHazardMem: Bool = memRegDecodeOutput(decoder.wxd) && checkHazards(hazardTargets, _ === memWaddr) + memRegDecodeOutput(parameter.decoderParameter.csr) =/= parameter.csrParameter.N || + memRegDecodeOutput(parameter.decoderParameter.mem) && memMemCmdBh || + Option.when(usingMulDiv && pipelinedMul)(memRegDecodeOutput(parameter.decoderParameter.mul)).getOrElse(false.B) || + Option.when(usingMulDiv)(memRegDecodeOutput(parameter.decoderParameter.div)).getOrElse(false.B) || + Option.when(usingFPU)(memRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B) + val dataHazardMem: Bool = memRegDecodeOutput(parameter.decoderParameter.wxd) && checkHazards(hazardTargets, _ === memWaddr) val fpDataHazardMem: Option[Bool] = fpHazardTargets.map(fpHazardTargets => - idDecodeOutput(decoder.fp) && - memRegDecodeOutput(decoder.wfd) && + idDecodeOutput(parameter.decoderParameter.fp) && + memRegDecodeOutput(parameter.decoderParameter.wfd) && checkHazards(fpHazardTargets, _ === memWaddr) ) val idMemHazard: Bool = memRegValid && (dataHazardMem && memCannotBypass || fpDataHazardMem.getOrElse(false.B)) - idLoadUse := memRegValid && dataHazardMem && memRegDecodeOutput(decoder.mem) + idLoadUse := memRegValid && dataHazardMem && memRegDecodeOutput(parameter.decoderParameter.mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. - val dataHazardWb: Bool = wbRegDecodeOutput(decoder.wxd) && checkHazards(hazardTargets, _ === wbWaddr) + val dataHazardWb: Bool = wbRegDecodeOutput(parameter.decoderParameter.wxd) && checkHazards(hazardTargets, _ === wbWaddr) val fpDataHazardWb: Bool = fpHazardTargets .map(fpHazardTargets => - idDecodeOutput(decoder.fp) && - wbRegDecodeOutput(decoder.wfd) && + idDecodeOutput(parameter.decoderParameter.fp) && + wbRegDecodeOutput(parameter.decoderParameter.wfd) && checkHazards(fpHazardTargets, _ === wbWaddr) ) .getOrElse(false.B) val idWbHazard: Bool = wbRegValid && (dataHazardWb && wbSetSboard || fpDataHazardWb) val idStallFpu: Bool = - fpu + io.fpu .zip(fpHazardTargets) .map { case (fpu, fpHazardTargets) => val fpScoreboard = new Scoreboard(32) // 8. set FP scoreboard - fpScoreboard.set(((wbDcacheMiss || wbRegDecodeOutput(decoder.vector)) && wbRegDecodeOutput(decoder.wfd) || fpu.sboard_set) && wbValid, wbWaddr) + fpScoreboard.set(((wbDcacheMiss || Option.when(usingVector)(wbRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) && wbRegDecodeOutput(parameter.decoderParameter.wfd) || fpu.sboard_set) && wbValid, wbWaddr) fpScoreboard.clear(dmemResponseReplay && dmemResponseFpu, dmemResponseWaddr) - t1Response.foreach { response => - val vectorTryToWriteFP = response.bits.rd.valid && response.bits.float - fpScoreboard.clear(response.fire && vectorTryToWriteFP, response.bits.rd.bits) - } fpScoreboard.clear(fpu.sboard_clr, fpu.sboard_clra) checkHazards(fpHazardTargets, fpScoreboard.read) } @@ -999,8 +1209,8 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val dcacheBlocked: Bool = { // speculate that a blocked D$ will unblock the cycle after a Grant val blocked = Reg(Bool()) - blocked := !dmem.req.ready && dmem.clock_enabled && !dmem.perf.grant && (blocked || dmem.req.valid || dmem.s2_nack) - blocked && !dmem.perf.grant + blocked := !io.dmem.req.ready && io.dmem.clock_enabled && !io.dmem.perf.grant && (blocked || io.dmem.req.valid || io.dmem.s2_nack) + blocked && !io.dmem.perf.grant } // vector stall @@ -1008,34 +1218,34 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters val vectorQueueFull: Option[Bool] = Option.when(usingVector)(Wire(Bool())) val vectorStall: Option[Bool] = Option.when(usingVector) { val vectorLSUNotClear = - (exRegValid && exRegDecodeOutput(decoder.vectorLSU)) || - (memRegValid && memRegDecodeOutput(decoder.vectorLSU)) || - (wbRegValid && wbRegDecodeOutput(decoder.vectorLSU)) || + (exRegValid && exRegDecodeOutput(parameter.decoderParameter.vectorLSU)) || + (memRegValid && memRegDecodeOutput(parameter.decoderParameter.vectorLSU)) || + (wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)) || !vectorLSUEmpty.get // Vector instruction queue is full // TODO: need cover. - (idDecodeOutput(decoder.vector) && vectorQueueFull.get) || - // There is an outstanding LSU. - (idDecodeOutput(decoder.mem) && !idDecodeOutput(decoder.vector) && vectorLSUNotClear) + (idDecodeOutput(parameter.decoderParameter.vector) && vectorQueueFull.get) || + // There is an outstanding LSU. + (idDecodeOutput(parameter.decoderParameter.mem) && !idDecodeOutput(parameter.decoderParameter.vector) && vectorLSUNotClear) } // TODO: vector stall val ctrlStalld: Bool = idExHazard || idMemHazard || idWbHazard || idScoreboardHazard || idDoFence || idRegPause || csr.io.csrStall || csr.io.singleStep && (exRegValid || memRegValid || wbRegValid) || - idCsrEn && csr.io.decode(0).fpCsr && !fpu.map(_.fcsr_rdy).getOrElse(false.B) || traceStall || + idCsrEn && csr.io.decode(0).fpCsr && !io.fpu.map(_.fcsr_rdy).getOrElse(false.B) || io.traceStall || !clockEnable || - Option.when(usingFPU)(idDecodeOutput(decoder.fp) && idStallFpu).getOrElse(false.B) || - idDecodeOutput(decoder.mem) && dcacheBlocked || // reduce activity during D$ misses + Option.when(usingFPU)(idDecodeOutput(parameter.decoderParameter.fp) && idStallFpu).getOrElse(false.B) || + idDecodeOutput(parameter.decoderParameter.mem) && dcacheBlocked || // reduce activity during D$ misses Option .when(usingMulDiv)( idDecodeOutput( - decoder.div - ) && (!(muldiv.io.req.ready || (muldiv.io.resp.valid && !wbWxd)) || muldiv.io.req.valid) + parameter.decoderParameter.div + ) && (!(mulDiv.io.req.ready || (mulDiv.io.resp.valid && !wbWxd)) || mulDiv.io.req.valid) ) .getOrElse(false.B) || // reduce odds of replay - // TODO: vectorStall is large, we may need it to gate the scalar core. - vectorStall.getOrElse(false.B) + // TODO: vectorStall is large, we may need it to gate the scalar core. + vectorStall.getOrElse(false.B) ctrlKilled := // IBUF not bubble @@ -1048,10 +1258,10 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters ctrlStalld || csr.io.interrupt - imem.req.valid := takePc - imem.req.bits.speculative := !takePcWb + io.imem.req.valid := takePc + io.imem.req.bits.speculative := !takePcWb // flush or branch misprediction - imem.req.bits.pc := Mux( + io.imem.req.bits.pc := Mux( wbException || csr.io.eret, csr.io.evec, // exception or [m|s]ret Mux( @@ -1060,188 +1270,120 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters memNextPC ) ) - imem.flush_icache := wbRegValid && wbRegDecodeOutput(decoder.fenceI) && !dmem.s2_nack - imem.might_request := { - imemMightRequestReg := exPcValid || memPcValid || ptw.customCSRs.disableICacheClockGate + io.imem.flush_icache := wbRegValid && wbRegDecodeOutput(parameter.decoderParameter.fenceI) && !io.dmem.s2_nack + io.imem.might_request := { + imemMightRequestReg := exPcValid || memPcValid /*|| io.ptw.customCSRs.disableICacheClockGate*/ imemMightRequestReg } - imem.progress := RegNext(wbRegValid && !replayWbCommon) - imem.sfence.valid := wbRegValid && wbRegSfence - imem.sfence.bits.rs1 := wbRegMemSize(0) - imem.sfence.bits.rs2 := wbRegMemSize(1) - imem.sfence.bits.addr := wbRegWdata - imem.sfence.bits.asid := wbRegRS2 - imem.sfence.bits.hv := wbRegHfenceV - imem.sfence.bits.hg := wbRegHfenceG - ptw.sfence := imem.sfence + io.imem.progress := RegNext(wbRegValid && !replayWbCommon) + io.imem.sfence.valid := wbRegValid && wbRegSfence + io.imem.sfence.bits.rs1 := wbRegMemSize(0) + io.imem.sfence.bits.rs2 := wbRegMemSize(1) + io.imem.sfence.bits.addr := wbRegWdata + io.imem.sfence.bits.asid := wbRegRS2 + io.imem.sfence.bits.hv := wbRegHfenceV + io.imem.sfence.bits.hg := wbRegHfenceG + io.ptw.sfence := io.imem.sfence instructionBufferOut.ready := !ctrlStalld - imem.btb_update.valid := memRegValid && !takePcWb && memWrongNpc && (!memCfi || memCfiTaken) - imem.btb_update.bits.isValid := memCfi - imem.btb_update.bits.cfiType := + io.imem.btb_update.valid := memRegValid && !takePcWb && memWrongNpc && (!memCfi || memCfiTaken) + io.imem.btb_update.bits.isValid := memCfi + io.imem.btb_update.bits.cfiType := Mux( - (memRegDecodeOutput(decoder.isJal) || memRegDecodeOutput(decoder.isJalr)) && memWaddr(0), + (memRegDecodeOutput(parameter.decoderParameter.isJal) || memRegDecodeOutput(parameter.decoderParameter.isJalr)) && memWaddr(0), CFIType.call, Mux( - memRegDecodeOutput(decoder.isJalr) && (memRegInstruction(19, 15) & regAddrMask.U) === BitPat("b00?01"), + memRegDecodeOutput(parameter.decoderParameter.isJalr) && (memRegInstruction(19, 15) & regAddrMask.U) === BitPat("b00?01"), CFIType.ret, - Mux(memRegDecodeOutput(decoder.isJal) || memRegDecodeOutput(decoder.isJalr), CFIType.jump, CFIType.branch) + Mux(memRegDecodeOutput(parameter.decoderParameter.isJal) || memRegDecodeOutput(parameter.decoderParameter.isJalr), CFIType.jump, CFIType.branch) ) ) - imem.btb_update.bits.target := imem.req.bits.pc - imem.btb_update.bits.br_pc := (if (usingCompressed) memRegPc + Mux(memRegRVC, 0.U, 2.U) else memRegPc) - imem.btb_update.bits.pc := ~(~imem.btb_update.bits.br_pc | (coreInstBytes * fetchWidth - 1).U) - imem.btb_update.bits.prediction := memRegBTBResponse - imem.btb_update.bits.taken := DontCare - - imem.bht_update.valid := memRegValid && !takePcWb - imem.bht_update.bits.pc := imem.btb_update.bits.pc - imem.bht_update.bits.taken := memBranchTaken - imem.bht_update.bits.mispredict := memWrongNpc - imem.bht_update.bits.branch := memRegDecodeOutput(decoder.isBranch) - imem.bht_update.bits.prediction := memRegBTBResponse.bht + io.imem.btb_update.bits.target := io.imem.req.bits.pc + io.imem.btb_update.bits.br_pc := (if (usingCompressed) memRegPc + Mux(memRegRVC, 0.U, 2.U) else memRegPc) + io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes * fetchWidth - 1).U) + io.imem.btb_update.bits.prediction := memRegBTBResponse + io.imem.btb_update.bits.taken := DontCare + + io.imem.bht_update.valid := memRegValid && !takePcWb + io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc + io.imem.bht_update.bits.taken := memBranchTaken + io.imem.bht_update.bits.mispredict := memWrongNpc + io.imem.bht_update.bits.branch := memRegDecodeOutput(parameter.decoderParameter.isBranch) + io.imem.bht_update.bits.prediction := memRegBTBResponse.bht // Connect RAS in Frontend - imem.ras_update := DontCare + io.imem.ras_update := DontCare - fpu.foreach { fpu => - fpu.valid := !ctrlKilled && idDecodeOutput(decoder.fp) + io.fpu.foreach { fpu => + fpu.valid := !ctrlKilled && idDecodeOutput(parameter.decoderParameter.fp) fpu.killx := ctrlKillx fpu.killm := killmCommon fpu.inst := idInstruction fpu.fromint_data := exRs(0) fpu.dmem_resp_val := dmemResponseValid && dmemResponseFpu - fpu.dmem_resp_data := (if (minFLen == 32) dmem.resp.bits.data_word_bypass else dmem.resp.bits.data) - fpu.dmem_resp_type := dmem.resp.bits.size + fpu.dmem_resp_data := (if (minFLen == 32) io.dmem.resp.bits.data_word_bypass else io.dmem.resp.bits.data) + fpu.dmem_resp_type := io.dmem.resp.bits.size fpu.dmem_resp_tag := dmemResponseWaddr - fpu.keep_clock_enabled := ptw.customCSRs.disableCoreClockGate +// fpu.keep_clock_enabled := io.ptw.customCSRs.disableCoreClockGate + fpu.keep_clock_enabled := false.B } - t1Request.foreach { t1 => - // Send instruction to T1 when write back. - t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(decoder.vector) - t1.bits.instruction := wbRegInstruction - t1.bits.rs1Data := wbRegWdata - t1.bits.rs2Data := wbRegRS2 - - val response: DecoupledIO[VectorResponse] = t1Response.get - - // TODO: make it configurable - val maxCount: Int = 32 - val countWidth = log2Up(maxCount) - - def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = { - val counter: UInt = RegInit(0.U(size.W)) - val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt) - val updateCounter = grant ^ release - when(updateCounter) { - counter := nextCount - } - flush.foreach(f => when(f)(counter := 0.U)) - val empty = (updateCounter && nextCount === 0.U) || counter === 0.U - val fullCounter: Int = (1 << size) - 1 - margin - val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U - (empty, full) - } - // Maintain lsu counter - val lsuGrant: Bool = t1.valid && wbRegDecodeOutput(decoder.vectorLSU) - val lsuRelease: Bool = response.fire && response.bits.mem - val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease) - // Maintain vector counter - // There may be 4 instructions in the pipe - val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get) - vectorLSUEmpty.foreach(_ := lsuEmpty) - vectorQueueFull.foreach(_ := vectorFull) - } - // todo: vector change csr - t1Response.foreach { vectorResponse => - val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float - val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float - vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && - (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP) - when(vectorResponse.fire && vectorTryToWriteRd) { - longlatencyWdata := vectorResponse.bits.data - longlatencyWaddress := vectorResponse.bits.rd.bits - longLatencyWenable := true.B - } - fpu.foreach { fpu => - when(!(dmemResponseValid && dmemResponseFpu)) { - fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP - fpu.dmem_resp_data := vectorResponse.bits.data - // todo: 32 bit only - fpu.dmem_resp_type := 2.U - // todo: connect tag - fpu.dmem_resp_tag := 0.U - } - } - } - - dmem.req.valid := exRegValid && exRegDecodeOutput(decoder.mem) - val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(decoder.fp)).getOrElse(false.B)) - require(coreParams.dcacheReqTagBits >= ex_dcache_tag.getWidth) - dmem.req.bits.tag := ex_dcache_tag - dmem.req.bits.cmd := exRegDecodeOutput(decoder.memCommand) - dmem.req.bits.size := exRegMemSize - dmem.req.bits.signed := !Mux(exRegHLS, exRegInstruction(20), exRegInstruction(14)) - dmem.req.bits.phys := false.B - dmem.req.bits.addr := encodeVirtualAddress(exRs(0), arithmeticLogicUnit.io.adder_out) - dmem.req.bits.idx.foreach(_ := dmem.req.bits.addr) - dmem.req.bits.dprv := Mux(exRegHLS, csr.io.hstatus.spvp, csr.io.status.dprv) - dmem.req.bits.dv := exRegHLS || csr.io.status.dv - dmem.req.bits.no_alloc := DontCare - dmem.req.bits.no_xcpt := DontCare - dmem.req.bits.data := DontCare - dmem.req.bits.mask := DontCare - dmem.s1_data.data := fpu - .map(fpu => Mux(memRegDecodeOutput(decoder.fp), Fill(xLen.max(fLen) / fLen, fpu.store_data), memRegRS2)) + io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem) + val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B)) +// require(coreParams.dcacheReqTagBits >= ex_dcache_tag.getWidth) + io.dmem.req.bits.tag := ex_dcache_tag + io.dmem.req.bits.cmd := exRegDecodeOutput(parameter.decoderParameter.memCommand) + io.dmem.req.bits.size := exRegMemSize + io.dmem.req.bits.signed := !Mux(exRegHLS, exRegInstruction(20), exRegInstruction(14)) + io.dmem.req.bits.phys := false.B + io.dmem.req.bits.addr := encodeVirtualAddress(exRs(0), alu.io.adder_out) + io.dmem.req.bits.idx.foreach(_ := io.dmem.req.bits.addr) + io.dmem.req.bits.dprv := Mux(exRegHLS, csr.io.hstatus.spvp, csr.io.status.dprv) + io.dmem.req.bits.dv := exRegHLS || csr.io.status.dv + io.dmem.req.bits.no_alloc := DontCare + io.dmem.req.bits.no_xcpt := DontCare + io.dmem.req.bits.data := DontCare + io.dmem.req.bits.mask := DontCare + io.dmem.s1_data.data := io.fpu + .map(fpu => Mux(memRegDecodeOutput(parameter.decoderParameter.fp), Fill(xLen.max(fLen.get) / fLen.get, fpu.store_data), memRegRS2)) .getOrElse(memRegRS2) - dmem.s1_data.mask := DontCare + io.dmem.s1_data.mask := DontCare - dmem.s1_kill := killmCommon || memLoadStoreException || fpuKillMem.getOrElse(false.B) - dmem.s2_kill := false.B + io.dmem.s1_kill := killmCommon || memLoadStoreException || fpuKillMem.getOrElse(false.B) + io.dmem.s2_kill := false.B // don't let D$ go to sleep if we're probably going to use it soon - dmem.keep_clock_enabled := instructionBufferOut.valid && idDecodeOutput(decoder.mem) && !csr.io.csrStall + io.dmem.keep_clock_enabled := instructionBufferOut.valid && idDecodeOutput(parameter.decoderParameter.mem) && !csr.io.csrStall // gate the clock val unpause: Bool = - csr.io.time(rocketParams.lgPauseCycles - 1, 0) === 0.U || csr.io.inhibitCycle || dmem.perf.release || takePc + csr.io.time(rocketParams.lgPauseCycles - 1, 0) === 0.U || csr.io.inhibitCycle || io.dmem.perf.release || takePc when(unpause) { idRegPause := false.B } - cease := csr.io.status.cease && !clockEnableReg - wfi := csr.io.status.wfi + io.cease := csr.io.status.cease && !clockEnableReg + io.wfi := csr.io.status.wfi if (rocketParams.clockGate) { - longLatencyStall := csr.io.csrStall || dmem.perf.blocked || idRegPause && !unpause - clockEnable := clockEnableReg || exPcValid || (!longLatencyStall && imem.resp.valid) + longLatencyStall := csr.io.csrStall || io.dmem.perf.blocked || idRegPause && !unpause + clockEnable := clockEnableReg || exPcValid || (!longLatencyStall && io.imem.resp.valid) clockEnableReg := exPcValid || memPcValid || wbPcValid || // instruction in flight - ptw.customCSRs.disableCoreClockGate || // chicken bit - !muldiv.io.req.ready || // mul/div in flight - fpu.map(!_.fcsr_rdy).getOrElse(false.B) || // long-latency FPU in flight - dmem.replay_next || // long-latency load replaying - (!longLatencyStall && (instructionBufferOut.valid || imem.resp.valid)) // instruction pending +// io.ptw.customCSRs.disableCoreClockGate || // chicken bit + !mulDiv.io.req.ready || // mul/div in flight + io.fpu.map(!_.fcsr_rdy).getOrElse(false.B) || // long-latency FPU in flight + io.dmem.replay_next || // long-latency load replaying + (!longLatencyStall && (instructionBufferOut.valid || io.imem.resp.valid)) // instruction pending assert(!(exPcValid || memPcValid || wbPcValid) || clockEnable) } // evaluate performance counters - val icacheBlocked = !(imem.resp.valid || RegNext(imem.resp.valid)) - csr.io.counters.foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) } + val icacheBlocked = !(io.imem.resp.valid || RegNext(io.imem.resp.valid)) + // todo: perfEvents here. +// csr.io.counters.foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) } } def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_ || _), PriorityMux(x)) - def coverExceptions( - exceptionValid: Bool, - cause: UInt, - labelPrefix: String, - coverCausesLabels: Seq[(Int, String)] - ): Unit = { - for ((coverCause, label) <- coverCausesLabels) { - property.cover(exceptionValid && (cause === coverCause.U), s"${labelPrefix}_${label}") - } - } - def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = targets.map(h => h._1 && cond(h._2)).reduce(_ || _) @@ -1272,12 +1414,13 @@ class Rocket(flushOnFenceI: Boolean, hasBeu: Boolean)(implicit val p: Parameters when(ens) { _r := _next } } } + } class RegFile(n: Int, w: Int, zero: Boolean = false) { - val rf = Mem(n, UInt(w.W)) - private def access(addr: UInt) = rf(~addr(log2Up(n) - 1, 0)) - private val reads = ArrayBuffer[(UInt, UInt)]() + val rf: Mem[UInt] = Mem(n, UInt(w.W)) + private def access(addr: UInt): UInt = rf(~addr(log2Ceil(n) - 1, 0)) + private val reads = collection.mutable.ArrayBuffer[(UInt, UInt)]() private var canRead = true def read(addr: UInt) = { require(canRead) @@ -1294,25 +1437,3 @@ class RegFile(n: Int, w: Int, zero: Boolean = false) { } } } - -object ImmGen { - def apply(sel: UInt, inst: UInt) = { - val sign = Mux(sel === IMM_Z, 0.S, inst(31).asSInt) - val b30_20 = Mux(sel === IMM_U, inst(30, 20).asSInt, sign) - val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19, 12).asSInt) - val b11 = Mux( - sel === IMM_U || sel === IMM_Z, - 0.S, - Mux(sel === IMM_UJ, inst(20).asSInt, Mux(sel === IMM_SB, inst(7).asSInt, sign)) - ) - val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, 0.U, inst(30, 25)) - val b4_1 = Mux( - sel === IMM_U, - 0.U, - Mux(sel === IMM_S || sel === IMM_SB, inst(11, 8), Mux(sel === IMM_Z, inst(19, 16), inst(24, 21))) - ) - val b0 = Mux(sel === IMM_S, inst(7), Mux(sel === IMM_I, inst(20), Mux(sel === IMM_Z, inst(15), 0.U))) - - Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt - } -} From 2a2cadb4770a66e26081816eb62d278a5444f849 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Wed, 3 Jul 2024 16:34:12 +0800 Subject: [PATCH 078/140] [rocketv] add elaborator for Rocket - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.Rocket config --useAsyncReset true --clockGate true --instructionSets rv32_i --vLen 0 --usingUser false --hartIdLen 1 --nPMPs 8 --asidBits 0 --nBreakpoints 1 --usingBTB false --useBPWatch false --mcontextWidth 0 --scontextWidth 0 --mulDivLantency 2 --divUnroll 1 --divEarlyOut false --divEarlyOutGranularity 1 --mulUnroll 1 --mulEarlyOut false --paddrBits 32 --cacheBlockBytes 32 --hasBeu false --fenceIFlushDCache false --fastLoadByte false --fastLoadWord false --dcacheNSets 64 --flushOnFenceI false - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.Rocket design --parameter ./Rocket.json --run-firtool --- elaborator/src/rocketv/Rocket.scala | 78 +++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 elaborator/src/rocketv/Rocket.scala diff --git a/elaborator/src/rocketv/Rocket.scala b/elaborator/src/rocketv/Rocket.scala new file mode 100644 index 000000000..99f90d17a --- /dev/null +++ b/elaborator/src/rocketv/Rocket.scala @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import mainargs._ +import org.chipsalliance.rocketv.{Rocket, RocketParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +object Rocket extends Elaborator { + @main + case class RocketParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "instructionSets") instructionSets: Set[String], + @arg(name = "vLen") vLen: Int, + @arg(name = "usingUser") usingUser: Boolean, + @arg(name = "hartIdLen") hartIdLen: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "usingBTB") usingBTB: Boolean, + @arg(name = "useBPWatch") useBPWatch: Boolean, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int, + @arg(name = "mulDivLantency") mulDivLantency: Int, + @arg(name = "divUnroll") divUnroll: Int, + @arg(name = "divEarlyOut") divEarlyOut: Boolean, + @arg(name = "divEarlyOutGranularity") divEarlyOutGranularity: Int, + @arg(name = "mulUnroll") mulUnroll: Int, + @arg(name = "mulEarlyOut") mulEarlyOut: Boolean, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "hasBeu") hasBeu: Boolean, + @arg(name = "fastLoadByte") fastLoadByte: Boolean, + @arg(name = "fastLoadWord") fastLoadWord: Boolean, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "flushOnFenceI") flushOnFenceI: Boolean) { + def convert: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets, + vLen, + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLantency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI + ) + } + + implicit def RocketParameterMainParser: ParserForClass[RocketParameterMain] = ParserForClass[RocketParameterMain] + + @main + def config(@arg(name = "parameter") parameter: RocketParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[Rocket, RocketParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From f6fdacbbe9c2663744835168c957cce0ba6c7c9d Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Wed, 3 Jul 2024 16:44:29 +0800 Subject: [PATCH 079/140] [rocketv] copy RocketTile into rocketv project --- rocketv/src/RocketTile.scala | 217 +++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 rocketv/src/RocketTile.scala diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala new file mode 100644 index 000000000..9d4c460d6 --- /dev/null +++ b/rocketv/src/RocketTile.scala @@ -0,0 +1,217 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package freechips.rocketchip.tile + +import chisel3._ +import org.chipsalliance.cde.config._ +import freechips.rocketchip.devices.tilelink._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.interrupts._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.rocket._ +import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike +import freechips.rocketchip.util._ +import freechips.rocketchip.prci.{ClockSinkParameters} + +case class RocketTileBoundaryBufferParams(force: Boolean = false) + +case class RocketTileParams( + core: RocketCoreParams = RocketCoreParams(), + icache: Option[ICacheParams] = Some(ICacheParams()), + dcache: Option[DCacheParams] = Some(DCacheParams()), + btb: Option[BTBParams] = Some(BTBParams()), + dataScratchpadBytes: Int = 0, + tileId: Int = 0, + beuAddr: Option[BigInt] = None, + blockerCtrlAddr: Option[BigInt] = None, + clockSinkParams: ClockSinkParameters = ClockSinkParameters(), + boundaryBuffers: Option[RocketTileBoundaryBufferParams] = None + ) extends InstantiableTileParams[RocketTile] { + require(icache.isDefined) + require(dcache.isDefined) + val baseName = "rockettile" + val uniqueName = s"${baseName}_$tileId" + def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters): RocketTile = { + new RocketTile(this, crossing, lookup) + } +} + +class RocketTile private( + val rocketParams: RocketTileParams, + crossing: ClockCrossingType, + lookup: LookupByHartIdImpl, + q: Parameters) + extends BaseTile(rocketParams, crossing, lookup, q) + with SinksExternalInterrupts + with SourcesExternalNotifications + with HasLazyRoCC // implies CanHaveSharedFPU with CanHavePTW with HasHellaCache + with HasHellaCache + with HasICacheFrontend +{ + // Private constructor ensures altered LazyModule.p is used implicitly + def this(params: RocketTileParams, crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters) = + this(params, crossing.crossingType, lookup, p) + + val intOutwardNode = rocketParams.beuAddr map { _ => IntIdentityNode() } + val slaveNode = TLIdentityNode() + val masterNode = visibilityNode + + val dtim_adapter = tileParams.dcache.flatMap { d => d.scratch.map { s => + LazyModule(new ScratchpadSlavePort(AddressSet.misaligned(s, d.dataScratchpadBytes), lazyCoreParamsView.coreDataBytes, tileParams.core.useAtomics && !tileParams.core.useAtomicsOnlyForIO)) + }} + dtim_adapter.foreach(lm => connectTLSlave(lm.node, lm.node.portParams.head.beatBytes)) + + val bus_error_unit = rocketParams.beuAddr map { a => + val beu = LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) + intOutwardNode.get := beu.intNode + connectTLSlave(beu.node, xBytes) + beu + } + + val tile_master_blocker = + tileParams.blockerCtrlAddr + .map(BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true)) + .map(bp => LazyModule(new BasicBusBlocker(bp))) + + tile_master_blocker.foreach(lm => connectTLSlave(lm.controlNode, xBytes)) + + // TODO: this doesn't block other masters, e.g. RoCCs + tlOtherMastersNode := tile_master_blocker.map { _.node := tlMasterXbar.node } getOrElse { tlMasterXbar.node } + masterNode :=* tlOtherMastersNode + DisableMonitors { implicit p => tlSlaveXbar.node :*= slaveNode } + + nDCachePorts += 1 /*core */ + (dtim_adapter.isDefined).toInt + + val dtimProperty = dtim_adapter.map(d => Map( + "sifive,dtim" -> d.device.asProperty)).getOrElse(Nil) + + val itimProperty = frontend.icache.itimProperty.toSeq.flatMap(p => Map("sifive,itim" -> p)) + + val beuProperty = bus_error_unit.map(d => Map( + "sifive,buserror" -> d.device.asProperty)).getOrElse(Nil) + + val cpuDevice: SimpleDevice = new SimpleDevice("cpu", Seq("sifive,rocket0", "riscv")) { + override def parent = Some(ResourceAnchors.cpus) + override def describe(resources: ResourceBindings): Description = { + val Description(name, mapping) = super.describe(resources) + Description(name, mapping ++ cpuProperties ++ nextLevelCacheProperty + ++ tileProperties ++ dtimProperty ++ itimProperty ++ beuProperty) + } + } + + ResourceBinding { + Resource(cpuDevice, "reg").bind(ResourceAddress(tileId)) + } + + override lazy val module = new RocketTileModuleImp(this) + + override def makeMasterBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (rocketParams.boundaryBuffers, crossing) match { + case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() + case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.none, BufferParams.flow, BufferParams.none, BufferParams.flow, BufferParams(1)) + case _ => TLBuffer(BufferParams.none) + } + + override def makeSlaveBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (rocketParams.boundaryBuffers, crossing) match { + case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() + case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.flow, BufferParams.none, BufferParams.none, BufferParams.none, BufferParams.none) + case _ => TLBuffer(BufferParams.none) + } +} + +class RocketTileModuleImp(outer: RocketTile) extends BaseTileModuleImp(outer) + with HasFpuOpt + with HasLazyRoCCModule + with HasICacheFrontendModule { + Annotated.params(this, outer.rocketParams) + + val core = Module(new Rocket(outer)(outer.p)) + + // reset vector is connected in the Frontend to s2_pc + core.io.reset_vector := DontCare + + // Report unrecoverable error conditions; for now the only cause is cache ECC errors + outer.reportHalt(List(outer.dcache.module.io.errors)) + + // Report when the tile has ceased to retire instructions; for now the only cause is clock gating + outer.reportCease(outer.rocketParams.core.clockGate.option( + !outer.dcache.module.io.cpu.clock_enabled && + !outer.frontend.module.io.cpu.clock_enabled && + !ptw.io.dpath.clock_enabled && + core.io.cease)) + + outer.reportWFI(Some(core.io.wfi)) + + outer.decodeCoreInterrupts(core.io.interrupts) // Decode the interrupt vector + + outer.bus_error_unit.foreach { beu => + core.io.interrupts.buserror.get := beu.module.io.interrupt + beu.module.io.errors.dcache := outer.dcache.module.io.errors + beu.module.io.errors.icache := outer.frontend.module.io.errors + } + + core.io.interrupts.nmi.foreach { nmi => nmi := outer.nmiSinkNode.get.bundle } + + // Pass through various external constants and reports that were bundle-bridged into the tile + outer.traceSourceNode.bundle <> core.io.trace + core.io.traceStall := outer.traceAuxSinkNode.bundle.stall + outer.bpwatchSourceNode.bundle <> core.io.bpwatch + core.io.hartid := outer.hartIdSinkNode.bundle + require(core.io.hartid.getWidth >= outer.hartIdSinkNode.bundle.getWidth, + s"core hartid wire (${core.io.hartid.getWidth}b) truncates external hartid wire (${outer.hartIdSinkNode.bundle.getWidth}b)") + + // Connect the core pipeline to other intra-tile modules + outer.frontend.module.io.cpu <> core.io.imem + dcachePorts += core.io.dmem // TODO outer.dcachePorts += () => module.core.io.dmem ?? + fpuOpt foreach { fpu => + core.io.fpu :<>= fpu.io.waiveAs[FPUCoreIO](_.cp_req, _.cp_resp) + fpu.io.cp_req := DontCare + fpu.io.cp_resp := DontCare + } + if (fpuOpt.isEmpty) { + core.io.fpu := DontCare + } + core.io.ptw <> ptw.io.dpath + + // Connect the coprocessor interfaces + if (outer.roccs.size > 0) { + cmdRouter.get.io.in <> core.io.rocc.cmd + outer.roccs.foreach{ lm => + lm.module.io.exception := core.io.rocc.exception + lm.module.io.fpu_req.ready := DontCare + lm.module.io.fpu_resp.valid := DontCare + lm.module.io.fpu_resp.bits.data := DontCare + lm.module.io.fpu_resp.bits.exc := DontCare + } + core.io.rocc.resp <> respArb.get.io.out + core.io.rocc.busy <> (cmdRouter.get.io.busy || outer.roccs.map(_.module.io.busy).reduce(_ || _)) + core.io.rocc.interrupt := outer.roccs.map(_.module.io.interrupt).reduce(_ || _) + (core.io.rocc.csrs zip roccCSRIOs.flatten).foreach { t => t._2 <> t._1 } + } else { + // tie off + core.io.rocc.cmd.ready := false.B + core.io.rocc.resp.valid := false.B + core.io.rocc.resp.bits := DontCare + core.io.rocc.busy := DontCare + core.io.rocc.interrupt := DontCare + } + // Dont care mem since not all RoCC need accessing memory + core.io.rocc.mem := DontCare + + // Rocket has higher priority to DTIM than other TileLink clients + outer.dtim_adapter.foreach { lm => dcachePorts += lm.module.io.dmem } + + // TODO eliminate this redundancy + val h = dcachePorts.size + val c = core.dcacheArbPorts + val o = outer.nDCachePorts + require(h == c, s"port list size was $h, core expected $c") + require(h == o, s"port list size was $h, outer counted $o") + // TODO figure out how to move the below into their respective mix-ins + dcacheArb.io.requestor <> dcachePorts.toSeq + ptw.io.requestor <> ptwPorts.toSeq +} + +trait HasFpuOpt { this: RocketTileModuleImp => + val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p))) +} From 8dd8988288bec435cdcd435e0db145787d323a0e Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 4 Jul 2024 15:53:09 +0800 Subject: [PATCH 080/140] [rocketv] implementing RocketTile --- rocketv/src/RocketTile.scala | 651 ++++++++++++++++++++++++----------- 1 file changed, 455 insertions(+), 196 deletions(-) diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala index 9d4c460d6..8f6938d50 100644 --- a/rocketv/src/RocketTile.scala +++ b/rocketv/src/RocketTile.scala @@ -1,217 +1,476 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package freechips.rocketchip.tile +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.rocketv import chisel3._ -import org.chipsalliance.cde.config._ -import freechips.rocketchip.devices.tilelink._ -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.interrupts._ -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.rocket._ -import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike -import freechips.rocketchip.util._ -import freechips.rocketchip.prci.{ClockSinkParameters} - -case class RocketTileBoundaryBufferParams(force: Boolean = false) - -case class RocketTileParams( - core: RocketCoreParams = RocketCoreParams(), - icache: Option[ICacheParams] = Some(ICacheParams()), - dcache: Option[DCacheParams] = Some(DCacheParams()), - btb: Option[BTBParams] = Some(BTBParams()), - dataScratchpadBytes: Int = 0, - tileId: Int = 0, - beuAddr: Option[BigInt] = None, - blockerCtrlAddr: Option[BigInt] = None, - clockSinkParams: ClockSinkParameters = ClockSinkParameters(), - boundaryBuffers: Option[RocketTileBoundaryBufferParams] = None - ) extends InstantiableTileParams[RocketTile] { - require(icache.isDefined) - require(dcache.isDefined) - val baseName = "rockettile" - val uniqueName = s"${baseName}_$tileId" - def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters): RocketTile = { - new RocketTile(this, crossing, lookup) - } +import chisel3.experimental.hierarchy.{Instance, Instantiate} +import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.util.experimental.BitSet +import chisel3.util.log2Ceil +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} +import org.chipsalliance.rvdecoderdb.Instruction + +object RocketTileParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit def rwP: upickle.default.ReadWriter[RocketTileParameter] = upickle.default.macroRW[RocketTileParameter] } -class RocketTile private( - val rocketParams: RocketTileParams, - crossing: ClockCrossingType, - lookup: LookupByHartIdImpl, - q: Parameters) - extends BaseTile(rocketParams, crossing, lookup, q) - with SinksExternalInterrupts - with SourcesExternalNotifications - with HasLazyRoCC // implies CanHaveSharedFPU with CanHavePTW with HasHellaCache - with HasHellaCache - with HasICacheFrontend -{ - // Private constructor ensures altered LazyModule.p is used implicitly - def this(params: RocketTileParams, crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(implicit p: Parameters) = - this(params, crossing.crossingType, lookup, p) - - val intOutwardNode = rocketParams.beuAddr map { _ => IntIdentityNode() } - val slaveNode = TLIdentityNode() - val masterNode = visibilityNode - - val dtim_adapter = tileParams.dcache.flatMap { d => d.scratch.map { s => - LazyModule(new ScratchpadSlavePort(AddressSet.misaligned(s, d.dataScratchpadBytes), lazyCoreParamsView.coreDataBytes, tileParams.core.useAtomics && !tileParams.core.useAtomicsOnlyForIO)) - }} - dtim_adapter.foreach(lm => connectTLSlave(lm.node, lm.node.portParams.head.beatBytes)) - - val bus_error_unit = rocketParams.beuAddr map { a => - val beu = LazyModule(new BusErrorUnit(new L1BusErrors, BusErrorUnitParams(a))) - intOutwardNode.get := beu.intNode - connectTLSlave(beu.node, xBytes) - beu +/** + * Core: + * isa: parse from isa string + * vlen: parse from isa string, e.g. rv32imfd_zvl64b_zve32f + * priv: m|s|u + * + * Memory: + * AXI width + * PMA config + * + * uarch: + * - clockGate: sync + * - hartIdLen: log2 hart size, 1 + * - fenceIFlushDCache: flush DCache on fence.i: true + * - nPMPs: pmp region size, 8 + * - asidBits: ASID length, 0 + * - nBreakpoints: todo, 0 + * - useBPWatch: todo, false + * - mcontextWidth: todo, 0 + * - scontextWidth: todo, 0 + * - hasBeu: has bus error unit, false + * + * - fastLoadByte: todo, true + * - fastLoadWord: todo, false + * - if (fastLoadByte) io.dmem.resp.bits.data(xLen-1, 0) + * - else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass(xLen-1, 0) + * - else wb_reg_wdata + * + * - mulDivLatency: + * - divUnroll: + * - divEarlyOut: + * - divEarlyOutGranularity: + * - mulUnroll: + * - mulEarlyOut: + * + * - itlbNSets: ??? + * - itlbNWays: ??? + * - itlbNSectors: ??? + * - itlbNSuperpageEntries: ??? + * + * - usingBTB: + * - btbEntries: 28 + * - btbNMatchBits: 14 + * - btbUpdatesOutOfOrder: false + * - nPages: 6 + * - nRAS: 6 + * - usingBHT: + * - nEntries: 512 + * - counterLength: 1 + * - historyLength: 8 + * - historyBits: 3 + * + * - icache/dcache size: 16K, 32K + * - cacheBlockBytes: 32 + * - cache way: 4 + * - cache banksize: 32 + * - iCachePrefetch: false, todo, AXI Hint. + */ +case class RocketTileParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + instructionSets: Set[String], + priv: String, + hartIdLen: Int, + useBPWatch: Boolean, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + resetVectorBits: Int, + nBreakpoints: Int, + dtlbNWays: Int, + dtlbNSets: Int, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + paddrBits: Int, + cacheBlockBytes: Int, + nPMPs: Int, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + bhtParameter: Option[BHTParameter], + mulDivLatency: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + flushOnFenceI: Boolean, + fastLoadByte: Boolean, + fastLoadWord: Boolean, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean) + extends SerializableModuleParameter { + + // calculate + def usingUser: Boolean = priv.contains("u") + + def usingSupervisor: Boolean = priv.contains("s") + + def vLen: Option[Int] = instructionSets.collectFirst { + case s"zvl${vlen}b" => vlen.toInt } - val tile_master_blocker = - tileParams.blockerCtrlAddr - .map(BasicBusBlockerParams(_, xBytes, masterPortBeatBytes, deadlock = true)) - .map(bp => LazyModule(new BasicBusBlocker(bp))) - - tile_master_blocker.foreach(lm => connectTLSlave(lm.controlNode, xBytes)) - - // TODO: this doesn't block other masters, e.g. RoCCs - tlOtherMastersNode := tile_master_blocker.map { _.node := tlMasterXbar.node } getOrElse { tlMasterXbar.node } - masterNode :=* tlOtherMastersNode - DisableMonitors { implicit p => tlSlaveXbar.node :*= slaveNode } - - nDCachePorts += 1 /*core */ + (dtim_adapter.isDefined).toInt - - val dtimProperty = dtim_adapter.map(d => Map( - "sifive,dtim" -> d.device.asProperty)).getOrElse(Nil) - - val itimProperty = frontend.icache.itimProperty.toSeq.flatMap(p => Map("sifive,itim" -> p)) - - val beuProperty = bus_error_unit.map(d => Map( - "sifive,buserror" -> d.device.asProperty)).getOrElse(Nil) - - val cpuDevice: SimpleDevice = new SimpleDevice("cpu", Seq("sifive,rocket0", "riscv")) { - override def parent = Some(ResourceAnchors.cpus) - override def describe(resources: ResourceBindings): Description = { - val Description(name, mapping) = super.describe(resources) - Description(name, mapping ++ cpuProperties ++ nextLevelCacheProperty - ++ tileProperties ++ dtimProperty ++ itimProperty ++ beuProperty) + // static for now + def hasBeu: Boolean = false + def usingNMI: Boolean = false + def usingHypervisor: Boolean = false + def usingDataScratchpad: Boolean = false + def nLocalInterrupts: Int = 0 + def dcacheArbPorts: Int = 2 + def tagECC: Option[String] = None + def dataECC: Option[String] = None + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) + def usingBTB: Boolean = btbEntries > 0 + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) } - } - - ResourceBinding { - Resource(cpuDevice, "reg").bind(ResourceAddress(tileId)) - } - override lazy val module = new RocketTileModuleImp(this) + def usingVM = hasInstructionSet("sfence.vma") - override def makeMasterBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (rocketParams.boundaryBuffers, crossing) match { - case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() - case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.none, BufferParams.flow, BufferParams.none, BufferParams.flow, BufferParams(1)) - case _ => TLBuffer(BufferParams.none) + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 } - override def makeSlaveBoundaryBuffers(crossing: ClockCrossingType)(implicit p: Parameters) = (rocketParams.boundaryBuffers, crossing) match { - case (Some(RocketTileBoundaryBufferParams(true )), _) => TLBuffer() - case (Some(RocketTileBoundaryBufferParams(false)), _: RationalCrossing) => TLBuffer(BufferParams.flow, BufferParams.none, BufferParams.none, BufferParams.none, BufferParams.none) - case _ => TLBuffer(BufferParams.none) + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingCompressed = hasInstructionSet("rv_c") + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def rocketParameter: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets, + vLen.getOrElse(0), + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLatency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI + ) + + def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNWays: Int, + dcacheNSets: Int, + dcacheRowBits: Int, + dtlbNSets: Int, + dtlbNWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def hellaCacheArbiterParameter: HellaCacheArbiterParameter = HellaCacheArbiterParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) + + def ptwParameter: PTWParameter = PTWParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int + ) + + def frontendParameter: FrontendParameter = FrontendParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + usingAtomics: Boolean, + usingDataScratchpad: Boolean, + usingVM: Boolean, + usingCompressed: Boolean, + usingBTB: Boolean, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + cacheBlockBytes: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + nPMPs: Int, + paddrBits: Int, + pgLevels: Int, + asidBits: Int, + bhtParameter: Option[BHTParameter], + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def fpuParameter: Option[FPUParameter] = fLen.zip(minFLen).map { + case (fLen, minFLen) => + FPUParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int + ) } -} - -class RocketTileModuleImp(outer: RocketTile) extends BaseTileModuleImp(outer) - with HasFpuOpt - with HasLazyRoCCModule - with HasICacheFrontendModule { - Annotated.params(this, outer.rocketParams) - val core = Module(new Rocket(outer)(outer.p)) + def instructionFetchParameter: AXI4BundleParameter = frontendParameter.instructionFetchParameter - // reset vector is connected in the Frontend to s2_pc - core.io.reset_vector := DontCare + def itimParameter: Option[AXI4BundleParameter] = frontendParameter.itimParameter - // Report unrecoverable error conditions; for now the only cause is cache ECC errors - outer.reportHalt(List(outer.dcache.module.io.errors)) + def loadStoreParameter: AXI4BundleParameter = hellaCacheParameter.loadStoreParameter - // Report when the tile has ceased to retire instructions; for now the only cause is clock gating - outer.reportCease(outer.rocketParams.core.clockGate.option( - !outer.dcache.module.io.cpu.clock_enabled && - !outer.frontend.module.io.cpu.clock_enabled && - !ptw.io.dpath.clock_enabled && - core.io.cease)) - - outer.reportWFI(Some(core.io.wfi)) - - outer.decodeCoreInterrupts(core.io.interrupts) // Decode the interrupt vector + def dtimParameter: Option[AXI4BundleParameter] = hellaCacheParameter.dtimParameter +} - outer.bus_error_unit.foreach { beu => - core.io.interrupts.buserror.get := beu.module.io.interrupt - beu.module.io.errors.dcache := outer.dcache.module.io.errors - beu.module.io.errors.icache := outer.frontend.module.io.errors - } +class RocketTileInterface(parameter: RocketTileParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + // todo: Const + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + + val debug: Bool = Input(Bool()) + val mtip: Bool = Input(Bool()) + val msip: Bool = Input(Bool()) + val meip: Bool = Input(Bool()) + val seip: Option[Bool] = Option.when(parameter.usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(parameter.nLocalInterrupts, Bool()) + val nmi = Option.when(parameter.usingNMI)(Bool()) + val nmiInterruptVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + val nmiIxceptionVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + // TODO: buserror should be handled by NMI + val buserror: Bool = Input(Bool()) + val wfi: Bool = Output(Bool()) + val halt: Bool = Output(Bool()) + + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val loadStoreAXI: AXI4RWIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = + parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) +} - core.io.interrupts.nmi.foreach { nmi => nmi := outer.nmiSinkNode.get.bundle } - - // Pass through various external constants and reports that were bundle-bridged into the tile - outer.traceSourceNode.bundle <> core.io.trace - core.io.traceStall := outer.traceAuxSinkNode.bundle.stall - outer.bpwatchSourceNode.bundle <> core.io.bpwatch - core.io.hartid := outer.hartIdSinkNode.bundle - require(core.io.hartid.getWidth >= outer.hartIdSinkNode.bundle.getWidth, - s"core hartid wire (${core.io.hartid.getWidth}b) truncates external hartid wire (${outer.hartIdSinkNode.bundle.getWidth}b)") - - // Connect the core pipeline to other intra-tile modules - outer.frontend.module.io.cpu <> core.io.imem - dcachePorts += core.io.dmem // TODO outer.dcachePorts += () => module.core.io.dmem ?? - fpuOpt foreach { fpu => - core.io.fpu :<>= fpu.io.waiveAs[FPUCoreIO](_.cp_req, _.cp_resp) - fpu.io.cp_req := DontCare - fpu.io.cp_resp := DontCare - } - if (fpuOpt.isEmpty) { - core.io.fpu := DontCare +class RocketTile(val parameter: RocketTileParameter) + extends FixedIORawModule(new RocketTileInterface(parameter)) + with SerializableModule[RocketTileParameter] { + val rocket: Instance[Rocket] = Instantiate(new Rocket(parameter.rocketParameter)) + val frontend: Instance[Frontend] = Instantiate(new Frontend(parameter.frontendParameter)) + val hellaCache: Instance[HellaCache] = Instantiate(new HellaCache(parameter.hellaCacheParameter)) + val hellaCacheArbiter: Instance[HellaCacheArbiter] = Instantiate( + new HellaCacheArbiter(parameter.hellaCacheArbiterParameter) + ) + val ptw: Instance[PTW] = Instantiate(new PTW(parameter.ptwParameter)) + val fpu: Option[Instance[FPU]] = parameter.fpuParameter.map(fpuParameter => Instantiate(new FPU(fpuParameter))) + + rocket.io.clock := io.clock + rocket.io.reset := io.reset + rocket.io.hartid := io.hartid + rocket.io.interrupts.debug := io.debug + rocket.io.interrupts.mtip := io.mtip + rocket.io.interrupts.msip := io.msip + rocket.io.interrupts.meip := io.meip + rocket.io.interrupts.seip.foreach(_ := io.seip.get) + rocket.io.interrupts.lip := io.lip + rocket.io.interrupts.nmi.foreach { nmi => + nmi.rnmi := io.nmi.get + nmi.rnmi_interrupt_vector := io.nmiInterruptVector.get + nmi.rnmi_exception_vector := io.nmiIxceptionVector.get } - core.io.ptw <> ptw.io.dpath - - // Connect the coprocessor interfaces - if (outer.roccs.size > 0) { - cmdRouter.get.io.in <> core.io.rocc.cmd - outer.roccs.foreach{ lm => - lm.module.io.exception := core.io.rocc.exception - lm.module.io.fpu_req.ready := DontCare - lm.module.io.fpu_resp.valid := DontCare - lm.module.io.fpu_resp.bits.data := DontCare - lm.module.io.fpu_resp.bits.exc := DontCare - } - core.io.rocc.resp <> respArb.get.io.out - core.io.rocc.busy <> (cmdRouter.get.io.busy || outer.roccs.map(_.module.io.busy).reduce(_ || _)) - core.io.rocc.interrupt := outer.roccs.map(_.module.io.interrupt).reduce(_ || _) - (core.io.rocc.csrs zip roccCSRIOs.flatten).foreach { t => t._2 <> t._1 } - } else { - // tie off - core.io.rocc.cmd.ready := false.B - core.io.rocc.resp.valid := false.B - core.io.rocc.resp.bits := DontCare - core.io.rocc.busy := DontCare - core.io.rocc.interrupt := DontCare + // @todo make it optional + rocket.io.buserror := io.buserror + io.wfi := rocket.io.wfi + io.loadStoreAXI <> hellaCache.io.loadStoreAXI + io.dtimAXI.zip(hellaCache.io.dtimAXI).foreach { case (io, hellaCache) => io <> hellaCache } + io.instructionFetchAXI <> frontend.io.instructionFetchAXI + io.itimAXI.zip(frontend.io.itimAXI).foreach { case (io, frontend) => io <> frontend } + // design for halt and beu, only use the halt function for now. + io.halt := Seq(frontend.io.nonDiplomatic.errors.uncorrectable, hellaCache.io.errors.uncorrectable) + .flatMap(_.map(_.valid)) + .foldLeft(false.B)(_ || _) + + // rocket core io + rocket.io.imem <> frontend.io.nonDiplomatic.cpu + hellaCacheArbiter.io.requestor(0) <> rocket.io.dmem + rocket.io.ptw <> ptw.io.dpath + rocket.io.fpu.zip(fpu.map(_.io.core)).foreach { case (core, fpu) => core <> fpu } + // used by trace module + rocket.io.bpwatch := DontCare + // don't use for now, this is design for report the custom cease status. + // rocket.io.cease + // it will be used in the future w/ trace support. + rocket.io.traceStall := false.B + + // frontend io + frontend.io.clock := io.clock + frontend.io.reset := io.reset + frontend.io.resetVector := io.resetVector + ptw.io.requestor(0) <> frontend.io.nonDiplomatic.ptw + + // hellacache io + hellaCache.io.clock := io.clock + hellaCache.io.reset := io.reset + ptw.io.requestor(1) <> hellaCache.io.ptw + hellaCache.io.cpu <> hellaCacheArbiter.io.mem + + // ptw io + ptw.io.clock := io.clock + ptw.io.reset := io.reset + hellaCacheArbiter.io.requestor(1) <> ptw.io.mem + + // hellacache arbiter io + hellaCacheArbiter.io.clock := io.clock + hellaCacheArbiter.io.reset := io.reset + + fpu.foreach { fpu => + fpu.io.clock := io.clock + fpu.io.reset := io.reset + // @todo: remove it from FPU. + fpu.io.cp_req <> DontCare + fpu.io.cp_resp <> DontCare } - // Dont care mem since not all RoCC need accessing memory - core.io.rocc.mem := DontCare - - // Rocket has higher priority to DTIM than other TileLink clients - outer.dtim_adapter.foreach { lm => dcachePorts += lm.module.io.dmem } - - // TODO eliminate this redundancy - val h = dcachePorts.size - val c = core.dcacheArbPorts - val o = outer.nDCachePorts - require(h == c, s"port list size was $h, core expected $c") - require(h == o, s"port list size was $h, outer counted $o") - // TODO figure out how to move the below into their respective mix-ins - dcacheArb.io.requestor <> dcachePorts.toSeq - ptw.io.requestor <> ptwPorts.toSeq -} - -trait HasFpuOpt { this: RocketTileModuleImp => - val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p))) } From cbb57521fc239a6a0f3756b2224d5b3e7b16f88c Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Thu, 4 Jul 2024 15:54:17 +0800 Subject: [PATCH 081/140] [rocketv] add elaborator for RocketTile - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.RocketTile config --useAsyncReset false --clockGate true --instructionSets rv32_i --priv m --hartIdLen 4 --useBPWatch false --mcontextWidth 0 --scontextWidth 0 --asidBits 0 --resetVectorBits 32 --nBreakpoints 0 --dtlbNWays 0 --dtlbNSets 0 --itlbNSets 0 --itlbNWays 0 --itlbNSectors 0 --itlbNSuperpageEntries 0 --nPTECacheEntries 0 --nL2TLBWays 0 --nL2TLBEntries 0 --paddrBits 32 --cacheBlockBytes 32 --nPMPs 8 --legal 00000000-ffffffff --cacheable 80000000-ffffffff --read 00000000-ffffffff --write 00000000-ffffffff --putPartial 00000000-ffffffff --logic 0 --arithmetic 0 --exec 80000000-ffffffff --sideEffects 00000000-3fffffff --btbEntries 28 --btbNMatchBits 14 --btbUpdatesOutOfOrder false --nPages 6 --nRAS 6 --bhtNEntries 512 --bhtCounterLength 1 --bhtHistoryLength 8 --bhtHistoryBits 3 --mulDivLatency 2 --divUnroll 1 --divEarlyOut false --divEarlyOutGranularity 0 --mulUnroll 1 --mulEarlyOut false --sfmaLatency 3 --dfmaLatency 3 --divSqrt true --flushOnFenceI true --fastLoadByte false --fastLoadWord false --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --maxUncachedInFlight 1 --separateUncachedResp false --iCacheNSets 64 --iCacheNWays 4 --iCachePrefetch false - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.RocketTile design --parameter ./RocketTile.json --run-firtool --- elaborator/src/rocketv/RocketTile.scala | 191 ++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 elaborator/src/rocketv/RocketTile.scala diff --git a/elaborator/src/rocketv/RocketTile.scala b/elaborator/src/rocketv/RocketTile.scala new file mode 100644 index 000000000..753d026a9 --- /dev/null +++ b/elaborator/src/rocketv/RocketTile.scala @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.rocketv.{BHTParameter, RocketTile, RocketTileParameter} +import org.chipsalliance.t1.elaborator.Elaborator + +// --useAsyncReset true --clockGate true --instructionSets rv32_i --priv m --hartIdLen 4 --useBPWatch false --mcontextWidth 0 --scontextWidth 0 --asidBits 0 --resetVectorBits 32 --nBreakpoints 0 --dtlbNWays 0 --dtlbNSets 0 --itlbNSets 0 --itlbNWays 0 --itlbNSectors 0 --itlbNSuperpageEntries 0 --nPTECacheEntries 0 --nL2TLBWays 0 --nL2TLBEntries 0 --paddrBits 32 --cacheBlockBytes 32 --nPMPs 8 --legal 00000000-ffffffff --cacheable 80000000-ffffffff --read 00000000-ffffffff --write 00000000-ffffffff --putPartial 00000000-ffffffff --logic 0 --arithmetic 0 --exec 80000000-ffffffff --sideEffects 00000000-3fffffff --btbEntries 28 --btbNMatchBits 14 --btbUpdatesOutOfOrder false --nPages 6 --nRAS 6 --bhtNEntries 512 --bhtCounterLength 1 --bhtHistoryLength 8 --bhtHistoryBits 3 --mulDivLatency 2 --divUnroll 1 --divEarlyOut false --divEarlyOutGranularity 0 --mulUnroll 1 --mulEarlyOut false --sfmaLatency 3 --dfmaLatency 3 --divSqrt true --flushOnFenceI true --fastLoadByte false --fastLoadWord false --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --maxUncachedInFlight 1 --separateUncachedResp false --iCacheNSets 64 --iCacheNWays 4 --iCachePrefetch false + +object RocketTile extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + @main + case class RocketTileParameterMain( + @arg(name = "useAsyncReset") useAsyncReset: Boolean, + @arg(name = "clockGate") clockGate: Boolean, + @arg(name = "instructionSets") instructionSets: Set[String], + @arg(name = "priv") priv: String, + @arg(name = "hartIdLen") hartIdLen: Int, + @arg(name = "useBPWatch") useBPWatch: Boolean, + @arg(name = "mcontextWidth") mcontextWidth: Int, + @arg(name = "scontextWidth") scontextWidth: Int, + @arg(name = "asidBits") asidBits: Int, + @arg(name = "resetVectorBits") resetVectorBits: Int, + @arg(name = "nBreakpoints") nBreakpoints: Int, + @arg(name = "dtlbNWays") dtlbNWays: Int, + @arg(name = "dtlbNSets") dtlbNSets: Int, + @arg(name = "itlbNSets") itlbNSets: Int, + @arg(name = "itlbNWays") itlbNWays: Int, + @arg(name = "itlbNSectors") itlbNSectors: Int, + @arg(name = "itlbNSuperpageEntries") itlbNSuperpageEntries: Int, + @arg(name = "nPTECacheEntries") nPTECacheEntries: Int, + @arg(name = "nL2TLBWays") nL2TLBWays: Int, + @arg(name = "nL2TLBEntries") nL2TLBEntries: Int, + @arg(name = "paddrBits") paddrBits: Int, + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "legal") legal: BitSet, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "read") read: BitSet, + @arg(name = "write") write: BitSet, + @arg(name = "putPartial") putPartial: BitSet, + @arg(name = "logic") logic: BitSet, + @arg(name = "arithmetic") arithmetic: BitSet, + @arg(name = "exec") exec: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet, + @arg(name = "btbEntries") btbEntries: Int, + @arg(name = "btbNMatchBits") btbNMatchBits: Int, + @arg(name = "btbUpdatesOutOfOrder") btbUpdatesOutOfOrder: Boolean, + @arg(name = "nPages") nPages: Int, + @arg(name = "nRAS") nRAS: Int, + @arg(name = "bhtNEntries") bhtNEntries: Option[Int], + @arg(name = "bhtCounterLength") bhtCounterLength: Option[Int], + @arg(name = "bhtHistoryLength") bhtHistoryLength: Option[Int], + @arg(name = "bhtHistoryBits") bhtHistoryBits: Option[Int], + @arg(name = "mulDivLatency") mulDivLatency: Int, + @arg(name = "divUnroll") divUnroll: Int, + @arg(name = "divEarlyOut") divEarlyOut: Boolean, + @arg(name = "divEarlyOutGranularity") divEarlyOutGranularity: Int, + @arg(name = "mulUnroll") mulUnroll: Int, + @arg(name = "mulEarlyOut") mulEarlyOut: Boolean, + @arg(name = "sfmaLatency") sfmaLatency: Int, + @arg(name = "dfmaLatency") dfmaLatency: Int, + @arg(name = "divSqrt") divSqrt: Boolean, + @arg(name = "flushOnFenceI") flushOnFenceI: Boolean, + @arg(name = "fastLoadByte") fastLoadByte: Boolean, + @arg(name = "fastLoadWord") fastLoadWord: Boolean, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "dcacheNWays") dcacheNWays: Int, + @arg(name = "dcacheRowBits") dcacheRowBits: Int, + @arg(name = "maxUncachedInFlight") maxUncachedInFlight: Int, + @arg(name = "separateUncachedResp") separateUncachedResp: Boolean, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean) { + def convert: RocketTileParameter = RocketTileParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + instructionSets: Set[String], + priv: String, + hartIdLen: Int, + useBPWatch: Boolean, + mcontextWidth: Int, + scontextWidth: Int, + asidBits: Int, + resetVectorBits: Int, + nBreakpoints: Int, + dtlbNWays: Int, + dtlbNSets: Int, + itlbNSets: Int, + itlbNWays: Int, + itlbNSectors: Int, + itlbNSuperpageEntries: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + paddrBits: Int, + cacheBlockBytes: Int, + nPMPs: Int, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet, + btbEntries: Int, + btbNMatchBits: Int, + btbUpdatesOutOfOrder: Boolean, + nPages: Int, + nRAS: Int, + bhtNEntries + .lazyZip(bhtCounterLength) + .lazyZip(bhtHistoryLength) + .lazyZip(bhtHistoryBits) + .map { + case ( + bhtNEntries, + bhtCounterLength, + bhtHistoryLength, + bhtHistoryBits + ) => + BHTParameter(bhtNEntries, bhtCounterLength, bhtHistoryLength, bhtHistoryBits) + } + .headOption: Option[BHTParameter], + mulDivLatency: Int, + divUnroll: Int, + divEarlyOut: Boolean, + divEarlyOutGranularity: Int, + mulUnroll: Int, + mulEarlyOut: Boolean, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + flushOnFenceI: Boolean, + fastLoadByte: Boolean, + fastLoadWord: Boolean, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean + ) + } + + implicit def RocketTileParameterMainParser: ParserForClass[RocketTileParameterMain] = + ParserForClass[RocketTileParameterMain] + + @main + def config(@arg(name = "parameter") parameter: RocketTileParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[RocketTile, RocketTileParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} From 68fe9a87ac3f6ffbb22181de1918ba8ef300d5b8 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Tue, 6 Aug 2024 18:14:36 +0800 Subject: [PATCH 082/140] [rocketv] add t1 specific logic --- elaborator/src/rocketv/Rocket.scala | 6 +- rocketv/src/Bundle.scala | 51 +++++++++++++++- rocketv/src/CSR.scala | 11 ++++ rocketv/src/RocketCore.scala | 92 +++++++++++++++++++++++++++-- rocketv/src/RocketTile.scala | 3 +- 5 files changed, 155 insertions(+), 8 deletions(-) diff --git a/elaborator/src/rocketv/Rocket.scala b/elaborator/src/rocketv/Rocket.scala index 99f90d17a..f5954500e 100644 --- a/elaborator/src/rocketv/Rocket.scala +++ b/elaborator/src/rocketv/Rocket.scala @@ -34,7 +34,8 @@ object Rocket extends Elaborator { @arg(name = "fastLoadByte") fastLoadByte: Boolean, @arg(name = "fastLoadWord") fastLoadWord: Boolean, @arg(name = "dcacheNSets") dcacheNSets: Int, - @arg(name = "flushOnFenceI") flushOnFenceI: Boolean) { + @arg(name = "flushOnFenceI") flushOnFenceI: Boolean, + @arg(name = "usingT1") usingT1: Boolean) { def convert: RocketParameter = RocketParameter( useAsyncReset, clockGate, @@ -61,7 +62,8 @@ object Rocket extends Elaborator { fastLoadByte, fastLoadWord, dcacheNSets, - flushOnFenceI + flushOnFenceI, + usingT1 ) } diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala index f6da32b86..31bc2ba16 100644 --- a/rocketv/src/Bundle.scala +++ b/rocketv/src/Bundle.scala @@ -5,7 +5,7 @@ package org.chipsalliance.rocketv import chisel3._ -import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, isPow2, log2Ceil} +import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, ValidIO, isPow2, log2Ceil} // This file defines Bundle shared in the project. // all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle. @@ -493,6 +493,14 @@ class DCSR extends Bundle { val prv = UInt(PRV.SZ.W) } +class VCSR extends Bundle { + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + val vcsr: UInt = UInt(32.W) + val vstart: UInt = UInt(32.W) +} + + class MIP(nLocalInterrupts: Int) extends Bundle { val lip = Vec(nLocalInterrupts, Bool()) val zero1 = Bool() @@ -1399,3 +1407,44 @@ class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entr val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits) val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits) } + +// Interface between T1 <> Rocket integration +class RocketCoreToT1(xLen: Int, vlWidth: Int) extends Bundle { + val issue: DecoupledIO[T1Issue] = Decoupled(new T1Issue(xLen, vlWidth)) + val retire: T1Retire = Flipped(new T1Retire(xLen)) +} + +class T1Issue(xLen: Int, vlWidth: Int) extends Bundle { + val instruction: UInt = UInt(32.W) + val rs1Data: UInt = UInt(xLen.W) + val rs2Data: UInt = UInt(xLen.W) + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + val vstart: UInt = UInt(32.W) + val vcsr: UInt = UInt(32.W) +} + +object T1Issue { + def vlmul(issue: T1Issue): UInt = issue.vtype(2, 0) + def vsew(issue: T1Issue): UInt = issue.vtype(5, 3) + def vta(issue: T1Issue): Bool = issue.vtype(6) + def vma(issue: T1Issue): Bool = issue.vtype(7) + def vxrm(issue: T1Issue): UInt = issue.vcsr(2, 1) +} + +class T1RdRetire(xLen: Int) extends Bundle { + val rdAddress: UInt = UInt(5.W) + val rdData: UInt = UInt(xLen.W) + val isFp: Bool = Bool() +} + +class T1CSRRetire extends Bundle { + val vxsat: UInt = UInt(32.W) + val fflag: UInt = UInt(32.W) +} + +class T1Retire(xLen: Int) extends Bundle { + val rd: Valid[T1RdRetire] = Valid(new T1RdRetire(xLen)) + val csr: Valid[T1CSRRetire] = Valid(new T1CSRRetire) + val mem: Valid[Bundle] = Valid(new Bundle {}) +} \ No newline at end of file diff --git a/rocketv/src/CSR.scala b/rocketv/src/CSR.scala index 266a339a5..aa72c4fda 100644 --- a/rocketv/src/CSR.scala +++ b/rocketv/src/CSR.scala @@ -279,6 +279,7 @@ class CSRInterface(parameter: CSRParameter) extends Bundle { val fiom = Output(Bool()) val vectorCsr = Option.when(parameter.usingVector)(Input(Bool())) val wbRegRS2 = Option.when(parameter.usingVector)(Input(UInt(parameter.xLen.W))) + val csrToVector = Option.when(parameter.usingVector)(Output(new VCSR)) // @todo custom CSR val customCSRs = Vec(parameter.customCSRSize, new CustomCSRIO(parameter.xLen)) } @@ -1678,6 +1679,16 @@ class CSR(val parameter: CSRParameter) // update csr for vector if (usingVector) { + // connect csr for vector + val vtype = vector.get.states("vill") ## 0.U(23.W) ## vector.get.states("vma") ## + vector.get.states("vta") ## vector.get.states("vsew") ## vector.get.states("vlmul") + val vcsr = vector.get.states("vxrm") ## vector.get.states("vxsat") + io.csrToVector.foreach {v => + v.vtype := vtype + v.vl := vector.get.states("vl") + v.vcsr := vcsr + v.vstart := vector.get.states("vstart") + } // set vl type val vsetvli = !io.inst(0)(31) val vsetivli = io.inst(0)(31, 30).andR diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala index 1dce3e850..2efb762e3 100644 --- a/rocketv/src/RocketCore.scala +++ b/rocketv/src/RocketCore.scala @@ -9,7 +9,7 @@ import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.util.circt.ClockGate import chisel3.util.experimental.decode.DecodeBundle -import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil} +import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up} import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes import org.chipsalliance.rvdecoderdb.Instruction @@ -43,9 +43,12 @@ case class RocketParameter( fastLoadByte: Boolean, fastLoadWord: Boolean, dcacheNSets: Int, - flushOnFenceI: Boolean + flushOnFenceI: Boolean, + usingT1: Boolean ) extends SerializableModuleParameter { + // interface to T1 + def usingVector = hasInstructionSet("rv_v") // fixed for now def usingRVE = false @@ -135,8 +138,6 @@ case class RocketParameter( // static to false for now def usingNMI = hasInstructionSet("rv_smrnmi") - def usingVector = hasInstructionSet("rv_v") - // calculated parameter def fetchWidth: Int = if (usingCompressed) 2 else 1 @@ -311,6 +312,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle { ) ) val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen))) + val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1(parameter.xLen, parameter.vLen)) val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch)) val cease = Output(Bool()) val wfi = Output(Bool()) @@ -1201,6 +1203,7 @@ class Rocket(val parameter: RocketParameter) // 8. set FP scoreboard fpScoreboard.set(((wbDcacheMiss || Option.when(usingVector)(wbRegDecodeOutput(parameter.decoderParameter.vector)).getOrElse(false.B)) && wbRegDecodeOutput(parameter.decoderParameter.wfd) || fpu.sboard_set) && wbValid, wbWaddr) fpScoreboard.clear(dmemResponseReplay && dmemResponseFpu, dmemResponseWaddr) + io.t1.foreach(t1 => fpScoreboard.clear(t1.retire.rd.valid && t1.retire.rd.bits.isFp, t1.retire.rd.bits.rdAddress)) fpScoreboard.clear(fpu.sboard_clr, fpu.sboard_clra) checkHazards(fpHazardTargets, fpScoreboard.read) } @@ -1329,6 +1332,87 @@ class Rocket(val parameter: RocketParameter) fpu.keep_clock_enabled := false.B } + // TODO: T1 only logic + io.t1.foreach { t1 => + // T1 Issue + val maxCount: Int = 32 + val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount)) + t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector) + t1IssueQueue.io.enq.bits.instruction := wbRegInstruction + t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata + t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2 + t1IssueQueue.io.enq.bits.vtype := csr.io.csrToVector.get.vtype + t1IssueQueue.io.enq.bits.vl := csr.io.csrToVector.get.vl + t1IssueQueue.io.enq.bits.vstart := csr.io.csrToVector.get.vstart + t1IssueQueue.io.enq.bits.vcsr := csr.io.csrToVector.get.vcsr + t1.issue.valid := t1IssueQueue.io.deq.valid + t1.issue.bits := t1IssueQueue.io.deq.bits + t1IssueQueue.io.deq.ready := t1.issue.ready + // For each different retirements, it should maintain different scoreboard + val t1CSRRetireQueue: Queue[T1CSRRetire] = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount)) + val t1XRDRetireQueue: Queue[T1RdRetire] = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount)) + + val countWidth = log2Up(maxCount) + def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = { + val counter: UInt = RegInit(0.U(size.W)) + val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt) + val updateCounter = grant ^ release + when(updateCounter) { + counter := nextCount + } + flush.foreach(f => when(f)(counter := 0.U)) + val empty = (updateCounter && nextCount === 0.U) || counter === 0.U + val fullCounter: Int = (1 << size) - 1 - margin + val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U + (empty, full) + } + // T1 Memory Scoreboard + val t1MemoryGrant: Bool = t1IssueQueue.io.enq.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU) + val t1MemoryRelease: Bool = t1.retire.mem.fire + // todo: handle vector lsu in pipe + // +1: There are instructions that will enter t1 + val (lsuEmpty, _) = counterManagement(countWidth + 1)(t1MemoryGrant, t1MemoryRelease) + // T1 CSR Scoreboard + // todo: add wbRegDecodeOutput(vectorWriteCsr) + val t1CSRGrant: Bool = false.B + val t1CSRRelease: Bool = false.B // t1CSRRetireQueue.io.deq.fire + val (t1CSREmpty, _) = counterManagement(countWidth + 1)(t1CSRGrant, t1CSRRelease) + // T1 XRD Scoreboard? + + // Maintain vector counter + // There may be 4 instructions in the pipe + val (_, vectorFull) = counterManagement(countWidth, 4)(t1IssueQueue.io.enq.valid, t1.issue.fire) + vectorLSUEmpty.foreach(_ := lsuEmpty) + vectorQueueFull.foreach(_ := vectorFull) + + t1XRDRetireQueue.io.enq.valid := t1.retire.rd.valid + t1XRDRetireQueue.io.enq.bits := t1.retire.rd.bits + t1CSRRetireQueue.io.enq.valid := t1.retire.csr.valid + t1CSRRetireQueue.io.enq.bits := t1.retire.csr.bits + // todo: write csr here + t1CSRRetireQueue.io.deq.ready := true.B + + val vectorTryToWriteRd = t1XRDRetireQueue.io.deq.valid && !t1XRDRetireQueue.io.deq.bits.isFp + val vectorTryToWriteFP = t1XRDRetireQueue.io.deq.valid && t1XRDRetireQueue.io.deq.bits.isFp + t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP) + + when(t1.retire.rd.fire && vectorTryToWriteRd) { + longlatencyWdata := t1.retire.rd.bits.rdData + longlatencyWaddress := t1.retire.rd.bits.rdAddress + longLatencyWenable := true.B + } + io.fpu.foreach { fpu => + when(!(dmemResponseValid && dmemResponseFpu)) { + fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP + fpu.dmem_resp_data := t1.retire.rd.bits.rdData + // todo: 32 bit only + fpu.dmem_resp_type := 2.U + // todo: connect tag + fpu.dmem_resp_tag := 0.U + } + } + } + io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem) val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B)) // require(coreParams.dcacheReqTagBits >= ex_dcache_tag.getWidth) diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala index 8f6938d50..fd369482c 100644 --- a/rocketv/src/RocketTile.scala +++ b/rocketv/src/RocketTile.scala @@ -243,7 +243,8 @@ case class RocketTileParameter( fastLoadByte, fastLoadWord, dcacheNSets, - flushOnFenceI + flushOnFenceI, + usingT1 = false ) def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( From a8406aa62d24a7f13c78371cfc0fed6fbccbcb40 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Sun, 7 Jul 2024 16:11:04 +0800 Subject: [PATCH 083/140] [nix] add rocketv derivation Add a derivation to get RocketTile.{anno.json,fir,sv}. This derivation will read from a pre-generated RocketTile.json for the elaborator. Signed-off-by: Avimitin --- nix/t1/default.nix | 2 + nix/t1/rocketv.nix | 85 +++++++++++++++++++++++++++++++++ rocketv/configs/RocketTile.json | 1 + 3 files changed, 88 insertions(+) create mode 100644 nix/t1/rocketv.nix create mode 100644 rocketv/configs/RocketTile.json diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 088cd98ae..9d1153733 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -35,6 +35,8 @@ lib.makeScope newScope configgen = _millOutput.configgen // { meta.mainProgram = "configgen"; }; t1package = _millOutput.t1package; + rocketv = self.callPackage ./rocketv.nix { }; + omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/nix/t1/rocketv.nix b/nix/t1/rocketv.nix new file mode 100644 index 000000000..c7ee3cce3 --- /dev/null +++ b/nix/t1/rocketv.nix @@ -0,0 +1,85 @@ +{ lib +, stdenv +, fetchMillDeps +, jdk21 + + # chisel deps +, mill +, espresso +, circt-full +, jextract-21 +, add-determinism + +, submodules +}: + +let + self = stdenv.mkDerivation rec { + name = "t1-rocketv"; + + src = with lib.fileset; toSource { + root = ./../..; + fileset = unions [ + ./../../build.sc + ./../../common.sc + ./../../ipemu + ./../../subsystem + ./../../emuhelper + ./../../t1 + ./../../rocket + ./../../rocketv + ./../../elaborator + ]; + }; + + passthru.millDeps = fetchMillDeps { + inherit name; + src = with lib.fileset; toSource { + root = ./../..; + fileset = unions [ + ./../../build.sc + ./../../common.sc + ]; + }; + millDepsHash = "sha256-ZwIl6YsaGde3ikbzxLzY2+/XTc5O2dQrOMKcwhKEq+k="; + nativeBuildInputs = [ submodules.setupHook ]; + }; + + passthru.editable = self.overrideAttrs (_: { + shellHook = '' + setupSubmodulesEditable + mill mill.bsp.BSP/install 0 + ''; + }); + + shellHook = '' + setupSubmodules + ''; + + nativeBuildInputs = [ + mill + circt-full + jextract-21 + add-determinism + espresso + + passthru.millDeps.setupHook + + submodules.setupHook + ]; + + env.CIRCT_INSTALL_PATH = circt-full; + + buildPhase = '' + mill -i elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.RocketTile design \ + --parameter ./rocketv/configs/RocketTile.json --run-firtool + ''; + + installPhase = '' + mkdir -p $out/share/rocketv + + mv RocketTile.{sv,anno.json,fir} $out/share/rocketv/ + ''; + }; +in +self diff --git a/rocketv/configs/RocketTile.json b/rocketv/configs/RocketTile.json new file mode 100644 index 000000000..1ffc5bbd1 --- /dev/null +++ b/rocketv/configs/RocketTile.json @@ -0,0 +1 @@ +{"useAsyncReset":false,"clockGate":true,"instructionSets":["rv32_i"],"priv":"m","hartIdLen":4,"useBPWatch":false,"mcontextWidth":0,"scontextWidth":0,"asidBits":0,"resetVectorBits":32,"nBreakpoints":0,"dtlbNWays":32,"dtlbNSets":64,"itlbNSets":64,"itlbNWays":32,"itlbNSectors":4,"itlbNSuperpageEntries":4,"nPTECacheEntries":0,"nL2TLBWays":1,"nL2TLBEntries":0,"paddrBits":32,"cacheBlockBytes":32,"nPMPs":8,"legal":"b????????????????????????????????","cacheable":"b1???????????????????????????????","read":"b????????????????????????????????","write":"b????????????????????????????????","putPartial":"b????????????????????????????????","logic":"b0","arithmetic":"b0","exec":"b1???????????????????????????????","sideEffects":"b00??????????????????????????????","btbEntries":28,"btbNMatchBits":14,"btbUpdatesOutOfOrder":false,"nPages":6,"nRAS":6,"bhtParameter":[{"nEntries":512,"counterLength":1,"historyLength":8,"historyBits":3}],"mulDivLatency":2,"divUnroll":1,"divEarlyOut":false,"divEarlyOutGranularity":0,"mulUnroll":1,"mulEarlyOut":false,"sfmaLatency":3,"dfmaLatency":3,"divSqrt":true,"flushOnFenceI":true,"fastLoadByte":false,"fastLoadWord":false,"dcacheNSets":64,"dcacheNWays":4,"dcacheRowBits":32,"maxUncachedInFlight":1,"separateUncachedResp":false,"iCacheNSets":32,"iCacheNWays":4,"iCachePrefetch":false} \ No newline at end of file From c6b3c4cdd39a6210abec8780522a2154e457bbbd Mon Sep 17 00:00:00 2001 From: Avimitin Date: Sun, 7 Jul 2024 16:24:03 +0800 Subject: [PATCH 084/140] [nix] add rocketv-mlirbc This derivation will produced firtool emitted MLIR bytecode. Signed-off-by: Avimitin --- nix/t1/default.nix | 1 + nix/t1/rocketv-mlirbc.nix | 20 ++++++++++++++++++++ nix/t1/rocketv.nix | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 nix/t1/rocketv-mlirbc.nix diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 9d1153733..57b472b99 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -36,6 +36,7 @@ lib.makeScope newScope t1package = _millOutput.t1package; rocketv = self.callPackage ./rocketv.nix { }; + rocketv-mlirbc = self.callPackage ./rocketv-mlirbc.nix { }; omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/nix/t1/rocketv-mlirbc.nix b/nix/t1/rocketv-mlirbc.nix new file mode 100644 index 000000000..0ace47603 --- /dev/null +++ b/nix/t1/rocketv-mlirbc.nix @@ -0,0 +1,20 @@ +{ stdenvNoCC + +, espresso +, circt + +, rocketv +}: +stdenvNoCC.mkDerivation { + name = "t1-rocketv-elaborated.mlirbc"; + + nativeBuildInputs = [ espresso circt ]; + + buildCommand = '' + firtool ${rocketv}/*.fir \ + --annotation-file ${rocketv}/*.anno.json \ + --emit-bytecode \ + --parse-only \ + -o $out + ''; +} diff --git a/nix/t1/rocketv.nix b/nix/t1/rocketv.nix index c7ee3cce3..197a0b518 100644 --- a/nix/t1/rocketv.nix +++ b/nix/t1/rocketv.nix @@ -76,9 +76,9 @@ let ''; installPhase = '' - mkdir -p $out/share/rocketv + mkdir -p $out - mv RocketTile.{sv,anno.json,fir} $out/share/rocketv/ + mv RocketTile.{sv,anno.json,fir} $out/ ''; }; in From 37d041910884d665f0132a0786dad934212c2b6f Mon Sep 17 00:00:00 2001 From: Avimitin Date: Sun, 7 Jul 2024 16:31:28 +0800 Subject: [PATCH 085/140] [nix] add rocketv-rtl target This derivation contains firtool emitted verilog. Signed-off-by: Avimitin --- nix/t1/default.nix | 1 + nix/t1/rocketv-rtl.nix | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 nix/t1/rocketv-rtl.nix diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 57b472b99..e6c38fb68 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -37,6 +37,7 @@ lib.makeScope newScope rocketv = self.callPackage ./rocketv.nix { }; rocketv-mlirbc = self.callPackage ./rocketv-mlirbc.nix { }; + rocketv-rtl = self.callPackage ./rocketv-rtl.nix { }; omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/nix/t1/rocketv-rtl.nix b/nix/t1/rocketv-rtl.nix new file mode 100644 index 000000000..f67521627 --- /dev/null +++ b/nix/t1/rocketv-rtl.nix @@ -0,0 +1,26 @@ +{ stdenvNoCC +, lib + +, circt +, rocketv-mlirbc +}: + +let + mfcArgs = lib.escapeShellArgs [ + "-O=debug" + "--split-verilog" + "--preserve-values=named" + "--lowering-options=verifLabels,omitVersionComment" + "--strip-debug-info" + ]; +in +stdenvNoCC.mkDerivation { + name = "t1-rocketv-rtl"; + nativeBuildInputs = [ circt ]; + + buildCommand = '' + mkdir -p $out + + firtool ${rocketv-mlirbc} ${mfcArgs} -o $out + ''; +} From 23edb71e0867fd109fb32ee0e8fea5d5b95f4a27 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Sun, 7 Jul 2024 20:16:16 +0800 Subject: [PATCH 086/140] [nix] add new derivation rocketv-verilated-csrc This derivation will produce headers and lib verilated from verilator. Signed-off-by: Avimitin --- nix/t1/default.nix | 3 ++ nix/t1/rocketv-verilated-csrc.nix | 78 +++++++++++++++++++++++++++++++ nix/t1/t1.nix | 2 + 3 files changed, 83 insertions(+) create mode 100644 nix/t1/rocketv-verilated-csrc.nix diff --git a/nix/t1/default.nix b/nix/t1/default.nix index e6c38fb68..0e1c5b055 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -35,9 +35,12 @@ lib.makeScope newScope configgen = _millOutput.configgen // { meta.mainProgram = "configgen"; }; t1package = _millOutput.t1package; + # FIXME: move all the rocketv file to an individual directory and put all attribute into one scope. rocketv = self.callPackage ./rocketv.nix { }; rocketv-mlirbc = self.callPackage ./rocketv-mlirbc.nix { }; rocketv-rtl = self.callPackage ./rocketv-rtl.nix { }; + rocketv-verilated-csrc = self.callPackage ./rocketv-verilated-csrc.nix { }; + rocketv-emu = self.callPackage ../../rocketemu { }; omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/nix/t1/rocketv-verilated-csrc.nix b/nix/t1/rocketv-verilated-csrc.nix new file mode 100644 index 000000000..eb5b4127f --- /dev/null +++ b/nix/t1/rocketv-verilated-csrc.nix @@ -0,0 +1,78 @@ +{ lib +, fetchgit +, stdenv +, rocketv-rtl +, verilator +, enable-trace ? true +, zlib +}: + +let + rocket-chip-v-src = fetchgit { + url = "https://github.com/chipsalliance/rocket-chip.git"; + rev = "833385404d9c722bdfad3e453c19a3ac6f40dbf0"; + fetchSubmodules = false; + sparseCheckout = [ + "src/main/resources/vsrc" + ]; + hash = "sha256-CUq9VDwb7ZtclosgOWfDZMOpH+U/yBjL5CNiXZRiB80="; + }; +in +stdenv.mkDerivation { + name = "t1-rocketv-verilated"; + + src = rocketv-rtl; + + nativeBuildInputs = [ verilator ]; + + propagatedBuildInputs = lib.optionals enable-trace [ zlib ]; + + env.rocketChipVSrc = "${rocket-chip-v-src}/src/main/resources/vsrc/"; + + buildPhase = '' + runHook preBuild + + echo "[nix] running verilator" + # FIXME: fix all the warning and remove -Wno- flag here + verilator \ + -I"$rocketChipVSrc" \ + ${lib.optionalString enable-trace "--trace-fst"} \ + --timing \ + --threads 8 \ + --threads-max-mtasks 8000 \ + -O1 \ + -Wno-WIDTHEXPAND \ + -Wno-LATCH \ + --cc TestBench + + echo "[nix] building verilated C lib" + + # backup srcs + mkdir -p $out/share + cp -r obj_dir $out/share/verilated_src + + rm $out/share/verilated_src/*.dat + + # We can't use -C here because VTestBench.mk is generated with relative path + cd obj_dir + make -j "$NIX_BUILD_CORES" -f VTestBench.mk libVTestBench + + runHook postBuild + ''; + + hardeningDisable = [ "fortify" ]; + + passthru = { + inherit enable-trace rocket-chip-v-src; + }; + + installPhase = '' + runHook preInstall + + mkdir -p $out/include $out/lib + cp *.h $out/include + cp *.a $out/lib + + runHook postInstall + ''; +} diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index 7e1d75680..0238148f1 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -29,6 +29,8 @@ let ./../../ipemu/src ./../../elaborator ./../../configgen/src + ./../../rocketv + ./../../rocketemu/src ]; }; From 46d0e9788dc3dc35b5c09c597d161a59f74f6ea7 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 8 Jul 2024 22:23:22 +0800 Subject: [PATCH 087/140] [rocketemu] implement TestBench for RocketV Emulator Signed-off-by: Avimitin --- build.sc | 13 ++ common.sc | 7 + elaborator/src/Main.scala | 12 ++ rocketemu/src/AXI4SlaveAgent.scala | 219 +++++++++++++++++++++++++++++ rocketemu/src/ClockGen.scala | 22 +++ rocketemu/src/DumpWave.scala | 21 +++ rocketemu/src/TestBench.scala | 90 ++++++++++++ rocketv/configs/RocketTile.json | 71 +++++++++- 8 files changed, 454 insertions(+), 1 deletion(-) create mode 100644 rocketemu/src/AXI4SlaveAgent.scala create mode 100644 rocketemu/src/ClockGen.scala create mode 100644 rocketemu/src/DumpWave.scala create mode 100644 rocketemu/src/TestBench.scala diff --git a/build.sc b/build.sc index dd587489a..3aa7d8710 100644 --- a/build.sc +++ b/build.sc @@ -149,6 +149,18 @@ trait IPEmulator def chiselIvy = None } +object rocketemu extends RocketEmulator +trait RocketEmulator extends millbuild.common.RocketEmulatorModule { + def scalaVersion = T(v.scala) + + def rocketVModule = rocketv + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + object panamaconverter extends PanamaConverter trait PanamaConverter @@ -175,6 +187,7 @@ trait Elaborator t1, ipemu, rocketv, + rocketemu, ) def mainargsIvy = v.mainargs diff --git a/common.sc b/common.sc index 82786b560..7f6e5d5c5 100644 --- a/common.sc +++ b/common.sc @@ -144,3 +144,10 @@ trait OMReaderModule super.forkArgs() ++ Seq("--enable-native-access=ALL-UNNAMED", "--enable-preview", s"-Djava.library.path=${ circtInstallPath().path / "lib"}") ) } + +trait RocketEmulatorModule + extends ScalaModule + with HasChisel { + def rocketVModule: ScalaModule + def moduleDeps = super.moduleDeps ++ Seq(rocketVModule) +} diff --git a/elaborator/src/Main.scala b/elaborator/src/Main.scala index 01eea2b24..17ee483a2 100644 --- a/elaborator/src/Main.scala +++ b/elaborator/src/Main.scala @@ -5,6 +5,7 @@ package org.chipsalliance.t1.elaborator import mainargs._ import org.chipsalliance.t1.rtl.T1Parameter +import org.chipsalliance.rocketv.RocketTileParameter import chisel3.panamalib.option._ object Main { @@ -66,7 +67,15 @@ object Main { def parameter: T1Parameter = generator.parameter } + case class RocketConfig( + @arg(name = "rocket-config", short = 'c') rocketConfig: os.Path) { + def generator = upickle.default + .read[chisel3.experimental.SerializableModuleGenerator[org.chipsalliance.rocketv.RocketTile, org.chipsalliance.rocketv.RocketTileParameter]](ujson.read(os.read(rocketConfig))) + def parameter: RocketTileParameter = generator.parameter + } + implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] + implicit def rocketConfig: ParserForClass[RocketConfig] = ParserForClass[RocketConfig] // format: off @main def ip(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => @@ -75,6 +84,9 @@ object Main { @main def ipemu(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => new org.chipsalliance.t1.ipemu.TestBench(ipConfig.generator) ) + @main def rocketemu(elaborateConfig: ElaborateConfig, rocketConfig: RocketConfig): Unit = elaborateConfig.elaborate(() => + new org.chipsalliance.t1.rocketv.TestBench(rocketConfig.generator) + ) // format: on def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) diff --git a/rocketemu/src/AXI4SlaveAgent.scala b/rocketemu/src/AXI4SlaveAgent.scala new file mode 100644 index 000000000..032d0c397 --- /dev/null +++ b/rocketemu/src/AXI4SlaveAgent.scala @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022-2024 Jiuyang Liu + +package org.chipsalliance.t1.rocketv.dpi + +// TODO: upstream to AMBA as VIP +import chisel3._ +import chisel3.util.circt.dpi.{RawClockedNonVoidFunctionCall, RawClockedVoidFunctionCall} +import chisel3.util.{scanLeftOr, OHToUInt, Reverse, Valid} +import org.chipsalliance.amba.axi4.bundle.{ + ARChannel, + ARFlowControl, + AWChannel, + AWFlowControl, + AXI4BundleParameter, + AXI4ROIrrevocableVerilog, + AXI4RWIrrevocableVerilog, + AXI4WOIrrevocableVerilog, + BChannel, + BFlowControl, + RChannel, + RFlowControl, + WChannel, + WFlowControl +} + +case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int) + +class AXI4SlaveAgentInterface(parameter: AXI4SlaveAgentParameter) extends Bundle { + val clock: Clock = Input(Clock()) + val reset: Reset = Input(Reset()) + val channelId: UInt = Input(Const(UInt(64.W))) + val channel = Flipped( + org.chipsalliance.amba.axi4.bundle.verilog.irrevocable(parameter.axiParameter) + ) +} + +class WritePayload(dataWidth: Int) extends Bundle { + val data = Vec(256, UInt(dataWidth.W)) + val strb = Vec(256, UInt((dataWidth / 8).W)) +} + +class ReadPayload(dataWidth: Int) extends Bundle { + require( + Seq(8, 16, 32, 64, 128, 256, 512, 1024).contains(dataWidth), + "A1.2.1: The data bus, which can be 8, 16, 32, 64, 128, 256, 512, or 1024 bits wide. A read response signal indicating the completion status of the read transaction." + ) + val data = Vec(256, UInt(dataWidth.W)) + val beats = UInt(8.W) +} + +// consume transaction from DPI, drive RTL signal +class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) + extends FixedIORawModule[AXI4SlaveAgentInterface](new AXI4SlaveAgentInterface(parameter)) { + dontTouch(io) + io.channel match { + case channel: AXI4RWIrrevocableVerilog => + new WriteManager(channel) + new ReadManager(channel) + case channel: AXI4ROIrrevocableVerilog => + new ReadManager(channel) + case channel: AXI4WOIrrevocableVerilog => + new WriteManager(channel) + } + + private class WriteManager( + channel: AWChannel with AWFlowControl with WChannel with WFlowControl with BChannel with BFlowControl) { + withClockAndReset(io.clock, io.reset) { + + /** There is an valid write transaction. */ + val valid = RegInit(0.U.asTypeOf(Bool())) + + /** memory to store the write payload + * @todo limit the payload size based on the RTL configuration. + */ + val writePayload = RegInit(0.U.asTypeOf(new WritePayload(parameter.axiParameter.dataWidth))) + + /** AWID, latch at AW fire, used at B fire. */ + val writeId = RegInit(0.U(16.W)) + + /** index the payload, used to write [[writePayload]] */ + val writeIdx = RegInit(0.U.asTypeOf(UInt(8.W))) + + /** indicate W is finished, used to wake up B channel. */ + val last = RegInit(0.U.asTypeOf(Bool())) + + // AW + channel.AWREADY := !valid + when(channel.AWREADY && channel.AWVALID) { + assert(valid === false.B) + writeId := channel.AWID + valid := true.B + writeIdx := 0.U + } + + // W + channel.WREADY := true.B + when(channel.WVALID && channel.WREADY) { + writePayload.data(writeIdx) := channel.WDATA + writePayload.strb(writeIdx) := channel.WSTRB + writeIdx := writeIdx + 1.U + when(channel.WLAST) { + last := true.B + RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( + io.clock, + when.cond, + io.channelId, + channel.AWID.asTypeOf(UInt(64.W)), + channel.AWADDR.asTypeOf(UInt(64.W)), + channel.AWLEN.asTypeOf(UInt(64.W)), + channel.AWSIZE.asTypeOf(UInt(64.W)), + channel.AWBURST.asTypeOf(UInt(64.W)), + channel.AWLOCK.asTypeOf(UInt(64.W)), + channel.AWCACHE.asTypeOf(UInt(64.W)), + channel.AWPROT.asTypeOf(UInt(64.W)), + channel.AWQOS.asTypeOf(UInt(64.W)), + channel.AWREGION.asTypeOf(UInt(64.W)), + WireDefault(writePayload) + ) + } + } + + // B + channel.BVALID := last + channel.BID := writeId + channel.BRESP := 0.U(2.W) // OK + channel.BUSER := DontCare + when(channel.BVALID && channel.BREADY) { + assert(valid === true.B) + valid := false.B + last := false.B + } + } + } + + private class ReadManager(channel: ARChannel with ARFlowControl with RChannel with RFlowControl) { + withClockAndReset(io.clock, io.reset) { + class CAMValue extends Bundle { + val arid = UInt(16.W) + val readPayload = new ReadPayload(parameter.axiParameter.dataWidth) + val readPayloadIndex = UInt(8.W) + val valid = Bool() + } + + /** CAM to maintain order of read requests. This is maintained as FIFO. */ + val cam: Vec[CAMValue] = RegInit(0.U.asTypeOf(Vec(parameter.outstanding, new CAMValue))) + + /** find first one circuit. */ + def ffo(input: UInt): UInt = ((~(scanLeftOr(input) << 1)).asUInt & input)(input.getWidth - 1, 0) + + /** find first non-valid slot in [[cam]] */ + val firstEmpty: UInt = OHToUInt(ffo(VecInit(cam.map(!_.valid)).asUInt)) + + /** there are no outstanding read requests. */ + val camIsEmpty = VecInit(cam.map(content => !content.valid)).asUInt.andR + + /** find oldest read. */ + val oldest = OHToUInt(ffo(VecInit(cam.map(content => content.valid)).asUInt)) + + /** index to select value from [[cam]] + * if cam empty, always select the next allocate value. + * if cam non-empty, update to oldest at each transaction end, this can be changed to random response with LFSR. + * @todo in the future, we can provide a fine-grand control to this index to provide out-of-order return. + */ + val rIndex = RegInit(0.U.asTypeOf(UInt(16.W))) + + // AR + channel.ARREADY := VecInit(cam.map(!_.valid)).asUInt.andR + when(channel.ARREADY && channel.ARVALID) { + cam(firstEmpty).arid := channel.ARID + cam(firstEmpty).readPayload := RawClockedNonVoidFunctionCall( + s"axi_read_${parameter.name}", + new ReadPayload(parameter.axiParameter.dataWidth) + )( + io.clock, + when.cond, + io.channelId, + channel.ARID.asTypeOf(UInt(64.W)), + channel.ARADDR.asTypeOf(UInt(64.W)), + channel.ARLEN.asTypeOf(UInt(64.W)), + channel.ARSIZE.asTypeOf(UInt(64.W)), + channel.ARBURST.asTypeOf(UInt(64.W)), + channel.ARLOCK.asTypeOf(UInt(64.W)), + channel.ARCACHE.asTypeOf(UInt(64.W)), + channel.ARPROT.asTypeOf(UInt(64.W)), + channel.ARQOS.asTypeOf(UInt(64.W)), + channel.ARREGION.asTypeOf(UInt(64.W)) + ).asInstanceOf[ReadPayload] + cam(firstEmpty).readPayloadIndex := 0.U + cam(firstEmpty).valid := true.B + } + + // R + rIndex := Mux( + camIsEmpty, + firstEmpty, // if cam empty, always select the next allocate value. + Mux( + channel.RREADY && channel.RVALID && channel.RLAST, + oldest, // if cam non-empty, update to oldest at each transaction end, this can be changed to random response with LFSR. + rIndex + ) + ) + + channel.RVALID := VecInit(cam.map(_.valid)).asUInt.orR + channel.RID := cam(rIndex).arid + channel.RDATA := cam(rIndex).readPayload.data(cam(rIndex).readPayloadIndex) + channel.RRESP := 0.U // OK + channel.RLAST := cam(rIndex).readPayload.beats === cam(rIndex).readPayloadIndex + channel.RUSER := DontCare + when(channel.RREADY && channel.RVALID) { + // increase index + cam(rIndex).readPayloadIndex := cam(rIndex).readPayloadIndex + 1.U + when(channel.RLAST) { + cam(rIndex).valid := false.B + } + } + } + } +} diff --git a/rocketemu/src/ClockGen.scala b/rocketemu/src/ClockGen.scala new file mode 100644 index 000000000..c43a6eabe --- /dev/null +++ b/rocketemu/src/ClockGen.scala @@ -0,0 +1,22 @@ +package org.chipsalliance.t1.rocketv + +import chisel3.{Bool, Clock, Output} +import chisel3.experimental.ExtModule +import chisel3.probe._ +import chisel3.util.HasExtModuleInline + +class ClockGen extends ExtModule with HasExtModuleInline { + setInline(s"$desiredName.sv", + s"""module $desiredName(output reg clock, output reg reset); + | initial begin + | clock = 1'b0; + | reset = 1'b1; + | end + | initial #(11) reset = 1'b0; + | always #10 clock = ~clock; + |endmodule + |""".stripMargin + ) + val clock = IO(Output(Bool())) + val reset = IO(Output(Bool())) +} diff --git a/rocketemu/src/DumpWave.scala b/rocketemu/src/DumpWave.scala new file mode 100644 index 000000000..a2d3c3d69 --- /dev/null +++ b/rocketemu/src/DumpWave.scala @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rocketv.dpi + +import chisel3.experimental.ExtModule +import chisel3.util.HasExtModuleInline + +class DumpWave extends ExtModule with HasExtModuleInline { + setInline( + s"DumpWave.sv", + s"""module DumpWave; + |export "DPI-C" function DumpWave; + |function DumpWave(input string file); + |$$dumpfile(file); + |$$dumpvars(0); + |endfunction; + |endmodule + |""".stripMargin + ) +} diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala new file mode 100644 index 000000000..509605840 --- /dev/null +++ b/rocketemu/src/TestBench.scala @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.rocketv + +import chisel3._ +import chisel3.experimental.SerializableModuleGenerator +import chisel3.experimental.dataview.DataViewable +import chisel3.util.circt.dpi.{ + RawClockedNonVoidFunctionCall, + RawClockedVoidFunctionCall, + RawUnlockedNonVoidFunctionCall +} +import org.chipsalliance.amba.axi4.bundle._ +import org.chipsalliance.t1.rocketv.dpi._ +import org.chipsalliance.rocketv.{Frontend, RocketTile, RocketTileParameter} + +class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTileParameter]) extends RawModule with ImplicitClock with ImplicitReset { + val clockGen = Module(new ClockGen) + Module(new DumpWave) + + override protected def implicitClock: Clock = clockGen.clock.asClock + override protected def implicitReset: Reset = clockGen.reset + + val clock: Clock = clockGen.clock.asClock + val reset: Bool = clockGen.reset + + val dut: RocketTile = withClockAndReset(clock, reset)(Module(generator.module())) + + dut.io.clock := clockGen.clock.asClock + dut.io.reset := clockGen.reset + dut.io.hartid := 0.U + dut.io.debug := 0.U + dut.io.mtip := 0.U + dut.io.meip := 0.U + dut.io.msip := 0.U + dut.io.buserror := 0.U + + dut.io.resetVector := 10000000.U + + val simulationTime = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + simulationTime := simulationTime + 1.U + + // simulation env + withClockAndReset(clock, reset) { + // TODO: this initial way cannot happen before reset... + val initFlag = RegInit(true.B) + val callInit = RawUnlockedNonVoidFunctionCall("cosim_init", Bool())(initFlag).asInstanceOf[Bool] + when(callInit) { + initFlag := false.B + printf(cf"""{"event":"simulationStart","parameter":{"cycle": ${simulationTime}}}\n""") + } + val watchdog = + RawUnlockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U).asInstanceOf[UInt] + when(watchdog =/= 0.U) { + stop(cf"""{"event":"simulationStop","parameter":{"reason": ${watchdog},"cycle": ${simulationTime}}}\n""") + } + } + + // Memory Drivers + val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] + val instFetchAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "instructionFetchAXI", + axiParameter = instFetchAXI.parameter, + outstanding = 4 + ) + ).suggestName("axi4_channel0_instructionFetchAXI") + ) + instFetchAgent.io.channel match { + case io: AXI4ROIrrevocableVerilog => io <> instFetchAXI + } + instFetchAgent.io.clock := clock + instFetchAgent.io.reset := reset + instFetchAgent.io.channelId := 0.U + + val loadStoreAXI = dut.io.loadStoreAXI.viewAs[AXI4RWIrrevocableVerilog] + val loadStoreAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter(name = "loadStoreAXI", axiParameter = loadStoreAXI.parameter, outstanding = 4) + ).suggestName("axi4_channel1_loadStoreAXI") + ) + loadStoreAgent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> loadStoreAXI + } + loadStoreAgent.io.clock := clock + loadStoreAgent.io.reset := reset + loadStoreAgent.io.channelId := 1.U +} diff --git a/rocketv/configs/RocketTile.json b/rocketv/configs/RocketTile.json index 1ffc5bbd1..ee5fb35c1 100644 --- a/rocketv/configs/RocketTile.json +++ b/rocketv/configs/RocketTile.json @@ -1 +1,70 @@ -{"useAsyncReset":false,"clockGate":true,"instructionSets":["rv32_i"],"priv":"m","hartIdLen":4,"useBPWatch":false,"mcontextWidth":0,"scontextWidth":0,"asidBits":0,"resetVectorBits":32,"nBreakpoints":0,"dtlbNWays":32,"dtlbNSets":64,"itlbNSets":64,"itlbNWays":32,"itlbNSectors":4,"itlbNSuperpageEntries":4,"nPTECacheEntries":0,"nL2TLBWays":1,"nL2TLBEntries":0,"paddrBits":32,"cacheBlockBytes":32,"nPMPs":8,"legal":"b????????????????????????????????","cacheable":"b1???????????????????????????????","read":"b????????????????????????????????","write":"b????????????????????????????????","putPartial":"b????????????????????????????????","logic":"b0","arithmetic":"b0","exec":"b1???????????????????????????????","sideEffects":"b00??????????????????????????????","btbEntries":28,"btbNMatchBits":14,"btbUpdatesOutOfOrder":false,"nPages":6,"nRAS":6,"bhtParameter":[{"nEntries":512,"counterLength":1,"historyLength":8,"historyBits":3}],"mulDivLatency":2,"divUnroll":1,"divEarlyOut":false,"divEarlyOutGranularity":0,"mulUnroll":1,"mulEarlyOut":false,"sfmaLatency":3,"dfmaLatency":3,"divSqrt":true,"flushOnFenceI":true,"fastLoadByte":false,"fastLoadWord":false,"dcacheNSets":64,"dcacheNWays":4,"dcacheRowBits":32,"maxUncachedInFlight":1,"separateUncachedResp":false,"iCacheNSets":32,"iCacheNWays":4,"iCachePrefetch":false} \ No newline at end of file +{ + "parameter": { + "useAsyncReset": false, + "clockGate": true, + "instructionSets": ["rv32_i"], + "priv": "m", + "hartIdLen": 4, + "useBPWatch": false, + "mcontextWidth": 0, + "scontextWidth": 0, + "asidBits": 0, + "resetVectorBits": 32, + "nBreakpoints": 0, + "dtlbNWays": 32, + "dtlbNSets": 64, + "itlbNSets": 64, + "itlbNWays": 32, + "itlbNSectors": 4, + "itlbNSuperpageEntries": 4, + "nPTECacheEntries": 0, + "nL2TLBWays": 1, + "nL2TLBEntries": 0, + "paddrBits": 32, + "cacheBlockBytes": 32, + "nPMPs": 8, + "legal": "b????????????????????????????????", + "cacheable": "b1???????????????????????????????", + "read": "b????????????????????????????????", + "write": "b????????????????????????????????", + "putPartial": "b????????????????????????????????", + "logic": "b0", + "arithmetic": "b0", + "exec": "b1???????????????????????????????", + "sideEffects": "b00??????????????????????????????", + "btbEntries": 28, + "btbNMatchBits": 14, + "btbUpdatesOutOfOrder": false, + "nPages": 6, + "nRAS": 6, + "bhtParameter": [ + { + "nEntries": 512, + "counterLength": 1, + "historyLength": 8, + "historyBits": 3 + } + ], + "mulDivLatency": 2, + "divUnroll": 1, + "divEarlyOut": false, + "divEarlyOutGranularity": 0, + "mulUnroll": 1, + "mulEarlyOut": false, + "sfmaLatency": 3, + "dfmaLatency": 3, + "divSqrt": true, + "flushOnFenceI": true, + "fastLoadByte": false, + "fastLoadWord": false, + "dcacheNSets": 64, + "dcacheNWays": 4, + "dcacheRowBits": 32, + "maxUncachedInFlight": 1, + "separateUncachedResp": false, + "iCacheNSets": 32, + "iCacheNWays": 4, + "iCachePrefetch": false + }, + "generator": "org.chipsalliance.rocketv.RocketTile" +} From f2032f05c51afa9a6a54200e7a42bf5288a3591d Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 8 Jul 2024 22:28:31 +0800 Subject: [PATCH 088/140] [nix] add rocketv into elaborator source Signed-off-by: Avimitin --- nix/t1/t1.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index 0238148f1..a0f68decf 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -63,6 +63,7 @@ let circt-full jextract-21 add-determinism + espresso makeWrapper passthru.millDeps.setupHook From b5d55a336afbd2c2a6d208f9ee81cb32aa708f6f Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 8 Jul 2024 22:59:21 +0800 Subject: [PATCH 089/140] [nix] use elaborator for Rocket generate Signed-off-by: Avimitin --- nix/t1/default.nix | 1 - nix/t1/rocketv-mlirbc.nix | 10 +++-- nix/t1/rocketv.nix | 85 --------------------------------------- nix/t1/t1.nix | 2 +- 4 files changed, 7 insertions(+), 91 deletions(-) delete mode 100644 nix/t1/rocketv.nix diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 0e1c5b055..2c6b4ade5 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -36,7 +36,6 @@ lib.makeScope newScope t1package = _millOutput.t1package; # FIXME: move all the rocketv file to an individual directory and put all attribute into one scope. - rocketv = self.callPackage ./rocketv.nix { }; rocketv-mlirbc = self.callPackage ./rocketv-mlirbc.nix { }; rocketv-rtl = self.callPackage ./rocketv-rtl.nix { }; rocketv-verilated-csrc = self.callPackage ./rocketv-verilated-csrc.nix { }; diff --git a/nix/t1/rocketv-mlirbc.nix b/nix/t1/rocketv-mlirbc.nix index 0ace47603..c01bd503a 100644 --- a/nix/t1/rocketv-mlirbc.nix +++ b/nix/t1/rocketv-mlirbc.nix @@ -3,16 +3,18 @@ , espresso , circt -, rocketv +, elaborator }: stdenvNoCC.mkDerivation { name = "t1-rocketv-elaborated.mlirbc"; - nativeBuildInputs = [ espresso circt ]; + nativeBuildInputs = [ elaborator espresso circt ]; buildCommand = '' - firtool ${rocketv}/*.fir \ - --annotation-file ${rocketv}/*.anno.json \ + mkdir elaborate + elaborator rocketemu --target-dir elaborate --rocket-config ${../../rocketv/configs/RocketTile.json} + firtool elaborate/*.fir \ + --annotation-file elaborate/*.anno.json \ --emit-bytecode \ --parse-only \ -o $out diff --git a/nix/t1/rocketv.nix b/nix/t1/rocketv.nix deleted file mode 100644 index 197a0b518..000000000 --- a/nix/t1/rocketv.nix +++ /dev/null @@ -1,85 +0,0 @@ -{ lib -, stdenv -, fetchMillDeps -, jdk21 - - # chisel deps -, mill -, espresso -, circt-full -, jextract-21 -, add-determinism - -, submodules -}: - -let - self = stdenv.mkDerivation rec { - name = "t1-rocketv"; - - src = with lib.fileset; toSource { - root = ./../..; - fileset = unions [ - ./../../build.sc - ./../../common.sc - ./../../ipemu - ./../../subsystem - ./../../emuhelper - ./../../t1 - ./../../rocket - ./../../rocketv - ./../../elaborator - ]; - }; - - passthru.millDeps = fetchMillDeps { - inherit name; - src = with lib.fileset; toSource { - root = ./../..; - fileset = unions [ - ./../../build.sc - ./../../common.sc - ]; - }; - millDepsHash = "sha256-ZwIl6YsaGde3ikbzxLzY2+/XTc5O2dQrOMKcwhKEq+k="; - nativeBuildInputs = [ submodules.setupHook ]; - }; - - passthru.editable = self.overrideAttrs (_: { - shellHook = '' - setupSubmodulesEditable - mill mill.bsp.BSP/install 0 - ''; - }); - - shellHook = '' - setupSubmodules - ''; - - nativeBuildInputs = [ - mill - circt-full - jextract-21 - add-determinism - espresso - - passthru.millDeps.setupHook - - submodules.setupHook - ]; - - env.CIRCT_INSTALL_PATH = circt-full; - - buildPhase = '' - mill -i elaborator.runMain org.chipsalliance.t1.elaborator.rocketv.RocketTile design \ - --parameter ./rocketv/configs/RocketTile.json --run-firtool - ''; - - installPhase = '' - mkdir -p $out - - mv RocketTile.{sv,anno.json,fir} $out/ - ''; - }; -in -self diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index a0f68decf..aafed98a2 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -101,7 +101,7 @@ let mkdir -p $configgen/bin $elaborator/bin makeWrapper ${jdk21}/bin/java $configgen/bin/configgen --add-flags "-jar $out/share/java/configgen.jar" - makeWrapper ${jdk21}/bin/java $elaborator/bin/elaborator --add-flags "--enable-preview -Djava.library.path=${circt-full}/lib -jar $out/share/java/elaborator.jar" + makeWrapper ${jdk21}/bin/java $elaborator/bin/elaborator --add-flags "--enable-preview -Djava.library.path=${circt-full}/lib -cp $out/share/java/elaborator.jar org.chipsalliance.t1.elaborator.Main" ''; }; in From 8b728483fccdf2fc2a76489f04bc53ecb6d34560 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Wed, 10 Jul 2024 17:08:28 +0800 Subject: [PATCH 090/140] [rocketemu] expose AXI4 agent as C-API Signed-off-by: Avimitin --- rocketemu/default.nix | 4 +++ rocketemu/dpi/CMakeLists.txt | 48 +++++++++++++++++++++++++++++ rocketemu/dpi/default.nix | 21 +++++++++++++ rocketemu/dpi/dpi.cc | 57 +++++++++++++++++++++++++++++++++++ rocketemu/dpi/dpi.h | 40 ++++++++++++++++++++++++ rocketemu/dpi/dpi_pre_link.cc | 55 +++++++++++++++++++++++++++++++++ rocketemu/dpi/dpi_pre_link.h | 22 ++++++++++++++ 7 files changed, 247 insertions(+) create mode 100644 rocketemu/default.nix create mode 100644 rocketemu/dpi/CMakeLists.txt create mode 100644 rocketemu/dpi/default.nix create mode 100644 rocketemu/dpi/dpi.cc create mode 100644 rocketemu/dpi/dpi.h create mode 100644 rocketemu/dpi/dpi_pre_link.cc create mode 100644 rocketemu/dpi/dpi_pre_link.h diff --git a/rocketemu/default.nix b/rocketemu/default.nix new file mode 100644 index 000000000..e300fa341 --- /dev/null +++ b/rocketemu/default.nix @@ -0,0 +1,4 @@ +{ lib +, callPackage +}: +callPackage ./dpi { } diff --git a/rocketemu/dpi/CMakeLists.txt b/rocketemu/dpi/CMakeLists.txt new file mode 100644 index 000000000..215d7984b --- /dev/null +++ b/rocketemu/dpi/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.20) +project(rocket_dpi_c) +set(CMAKE_CXX_STANDARD 17) + +message(STATUS "Project '${PROJECT_NAME}' build type: ${CMAKE_BUILD_TYPE}") + +set(THREADS_PREFER_PTHREAD_FLAG ON) + +add_library(dpi + STATIC + dpi.cc +) + +add_library(dpi_pre_link + STATIC + dpi_pre_link.cc +) + +if (NOT DEFINED VERILATED_LIB_DIR) + set(VERILATED_LIB_DIR "$ENV{VERILATED_LIB_DIR}") + if (VERILATED_LIB_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATE_LIB_DIR or environment variable VERILATED_LIB_DIR, but it seems not") + endif() +endif() + +if (NOT DEFINED VERILATED_INC_DIR) + set(VERILATED_INC_DIR "$ENV{VERILATED_INC_DIR}") + if (VERILATED_INC_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATED_INC_DIR or environment variable VERILATED_INC_DIR, but it seems not") + endif() + message("Using VERILATED_INC_DIR ${VERILATED_INC_DIR}") +endif() + +# include verilated headers +target_include_directories(dpi PUBLIC ${VERILATED_INC_DIR}) +target_include_directories(dpi PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(dpi_pre_link PUBLIC ${VERILATED_INC_DIR}) +target_include_directories(dpi_pre_link PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# include verilator headers +find_package(verilator REQUIRED) +message(STATUS "Found verilator: ${verilator_DIR}") +target_include_directories(dpi PUBLIC ${verilator_DIR}/include) +target_include_directories(dpi PUBLIC ${verilator_DIR}/include/vltstd) +target_include_directories(dpi_pre_link PUBLIC ${verilator_DIR}/include) +target_include_directories(dpi_pre_link PUBLIC ${verilator_DIR}/include/vltstd) + +install(TARGETS dpi dpi_pre_link ARCHIVE) diff --git a/rocketemu/dpi/default.nix b/rocketemu/dpi/default.nix new file mode 100644 index 000000000..d29daf0a7 --- /dev/null +++ b/rocketemu/dpi/default.nix @@ -0,0 +1,21 @@ +{ lib +, verilator +, stdenv +, cmake +, rocketv-verilated-csrc +}: +stdenv.mkDerivation { + name = "rocketv-emulator"; + + src = ./.; + + nativeBuildInputs = [ + cmake + verilator + ]; + + env = { + VERILATED_INC_DIR = "${rocketv-verilated-csrc}/include"; + VERILATED_LIB_DIR = "${rocketv-verilated-csrc}/lib"; + }; +} diff --git a/rocketemu/dpi/dpi.cc b/rocketemu/dpi/dpi.cc new file mode 100644 index 000000000..7d3cff38c --- /dev/null +++ b/rocketemu/dpi/dpi.cc @@ -0,0 +1,57 @@ +// This file includes DPI call implementatitons + +#include "svdpi.h" + +#include "dpi.h" + +extern "C" { + +void *dpi_call_target; + +extern svLogic DumpWave(const char *file); + +extern void axi_read_instructionFetchAXI(long long channel_id, long long ar_id, + long long ar_addr, long long ar_len, + long long ar_size, long long ar_burst, + long long ar_lock, long long ar_cache, + long long ar_prot, long long ar_qos, + long long ar_region, + svBitVecVal *payload) { + axi_read_instructionFetchAXI_rs(dpi_call_target, channel_id, ar_id, ar_addr, + ar_len, ar_size, ar_burst, ar_lock, ar_cache, + ar_prot, ar_qos, ar_region, payload); +}; + +extern void axi_read_loadStoreAXI(void *dpi_call_target, long long channel_id, + long long ar_id, long long ar_addr, + long long ar_len, long long ar_size, + long long ar_burst, long long ar_lock, + long long ar_cache, long long ar_prot, + long long ar_qos, long long ar_region, + svBitVecVal *payload) { + axi_read_loadStoreAXI(dpi_call_target, channel_id, ar_id, ar_addr, ar_len, + ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, + ar_region, payload); +}; + +extern void axi_write_loadStoreAXI(long long channel_id, long long aw_id, + long long aw_addr, long long aw_len, + long long aw_size, long long aw_burst, + long long aw_lock, long long aw_cache, + long long aw_prot, long long aw_qos, + long long aw_region, + const svBitVecVal *payload) { + axi_write_loadStoreAXI_rs(dpi_call_target, channel_id, aw_id, aw_addr, aw_len, + aw_size, aw_burst, aw_lock, aw_cache, aw_prot, + aw_qos, aw_region, payload); +}; + +extern void cosim_init(svBit *call_init) { + dpi_call_target = cosim_init_rs(call_init); +}; + +extern void cosim_watchdog(char *reason) { + cosim_watchdog_rs(dpi_call_target, reason); +}; + +} // extern "C" diff --git a/rocketemu/dpi/dpi.h b/rocketemu/dpi/dpi.h new file mode 100644 index 000000000..ae2564bb8 --- /dev/null +++ b/rocketemu/dpi/dpi.h @@ -0,0 +1,40 @@ +// This file includes DPI calls to be implemented in Rust + +#pragma once + +#include "svdpi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void *dpi_call_target; + +// Parameters came from AXIAgent.scala +extern void axi_read_instructionFetchAXI_rs( + void *dpi_call_target, long long channel_id, long long ar_id, + long long ar_addr, long long ar_len, long long ar_size, long long ar_burst, + long long ar_lock, long long ar_cache, long long ar_prot, long long ar_qos, + long long ar_region, svBitVecVal *payload); + +extern void axi_read_loadStoreAXI_rs(void *dpi_call_target, + long long channel_id, long long ar_id, + long long ar_addr, long long ar_len, + long long ar_size, long long ar_burst, + long long ar_lock, long long ar_cache, + long long ar_prot, long long ar_qos, + long long ar_region, svBitVecVal *payload); + +extern void axi_write_loadStoreAXI_rs( + void *dpi_call_target, long long channel_id, long long aw_id, + long long aw_addr, long long aw_len, long long aw_size, long long aw_burst, + long long aw_lock, long long aw_cache, long long aw_prot, long long aw_qos, + long long aw_region, const svBitVecVal *payload); + +extern void* cosim_init_rs(svBit *call_init); + +extern void cosim_watchdog_rs(void *dpi_call_target, char *reason); + +#ifdef __cplusplus +} +#endif diff --git a/rocketemu/dpi/dpi_pre_link.cc b/rocketemu/dpi/dpi_pre_link.cc new file mode 100644 index 000000000..2845e3e99 --- /dev/null +++ b/rocketemu/dpi/dpi_pre_link.cc @@ -0,0 +1,55 @@ +#include +#include + +#include "dpi_pre_link.h" + +class VTestBench; + +VerilatedContext *contextp; +VTestBench *topp; + +int verilator_main_c(int argc, char **argv) { + // Setup context, defaults, and parse command line + Verilated::debug(0); + contextp = new VerilatedContext(); + contextp->commandArgs(argc, argv); + + // Construct the Verilated model, from Vtop.h generated from Verilating + topp = new VTestBench(contextp); + + // Simulate until $finish + while (!contextp->gotFinish()) { + // Evaluate model + topp->eval(); + // Advance time + if (!topp->eventsPending()) + break; + contextp->time(topp->nextTimeSlot()); + } + + if (!contextp->gotFinish()) { + VL_DEBUG_IF(VL_PRINTF("+ Exiting without $finish; no events left\n");); + } + + // Final model cleanup + topp->final(); + + delete topp; + delete contextp; + + return 0; +} + +void dump_wave_c(char *path) { + Verilated::traceEverOn(true); + svSetScope(svGetScopeFromName("TOP.TestBench.DumpWave")); + DumpWave(path); +} + +uint64_t get_t_c() { + if (contextp) { + return contextp->time(); + } else { // before ctx is initialized + return 0; + } +} diff --git a/rocketemu/dpi/dpi_pre_link.h b/rocketemu/dpi/dpi_pre_link.h new file mode 100644 index 000000000..a63ee7dab --- /dev/null +++ b/rocketemu/dpi/dpi_pre_link.h @@ -0,0 +1,22 @@ +// This header provides seveal functions to be used in Rust +// +// dpi_pre_link should be linked before libverilated.so because in +// uses symbols in libverilated.so + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int verilator_main_c(int argc, char **argv); + +void dump_wave_c(char *path); + +uint64_t get_t_c(); + +#ifdef __cplusplus +} +#endif From edb9beeb74f9d302a0f6c121de150f2a6c682ecb Mon Sep 17 00:00:00 2001 From: Avimitin Date: Wed, 10 Jul 2024 18:16:30 +0800 Subject: [PATCH 091/140] [nix] add scope for rocketemu attribute Signed-off-by: Avimitin --- rocketemu/default.nix | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rocketemu/default.nix b/rocketemu/default.nix index e300fa341..1c0a81dde 100644 --- a/rocketemu/default.nix +++ b/rocketemu/default.nix @@ -1,4 +1,7 @@ { lib -, callPackage +, newScope }: -callPackage ./dpi { } +lib.makeScope newScope (scope: { + c-dpi-lib = scope.callPackage ./dpi { }; + driver = scope.callPackage ./driver { }; +}) From ffdfc9493503d9467323fec91c9ca04ce24eda32 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Wed, 10 Jul 2024 19:22:07 +0800 Subject: [PATCH 092/140] [rocketemu] implemented rocket driver Signed-off-by: Avimitin --- rocketemu/driver/Cargo.lock | 302 ++++++++++++++++++++++++++++++++++ rocketemu/driver/Cargo.toml | 11 ++ rocketemu/driver/build.rs | 20 +++ rocketemu/driver/default.nix | 33 ++++ rocketemu/driver/src/dpi.rs | 216 ++++++++++++++++++++++++ rocketemu/driver/src/main.rs | 7 + rocketemu/driver/src/sim.rs | 236 ++++++++++++++++++++++++++ rocketemu/src/TestBench.scala | 2 +- 8 files changed, 826 insertions(+), 1 deletion(-) create mode 100644 rocketemu/driver/Cargo.lock create mode 100644 rocketemu/driver/Cargo.toml create mode 100644 rocketemu/driver/build.rs create mode 100644 rocketemu/driver/default.nix create mode 100644 rocketemu/driver/src/dpi.rs create mode 100644 rocketemu/driver/src/main.rs create mode 100644 rocketemu/driver/src/sim.rs diff --git a/rocketemu/driver/Cargo.lock b/rocketemu/driver/Cargo.lock new file mode 100644 index 000000000..4d410cfa7 --- /dev/null +++ b/rocketemu/driver/Cargo.lock @@ -0,0 +1,302 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "clap" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "driver" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "elf", + "hex", + "tracing", +] + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/rocketemu/driver/Cargo.toml b/rocketemu/driver/Cargo.toml new file mode 100644 index 000000000..46c7e1b7c --- /dev/null +++ b/rocketemu/driver/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "driver" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.4.18", features = ["derive"] } +tracing = { version = "0.1.40" } +elf = "0.7.4" +anyhow = "1.0.86" +hex = "0.4.3" diff --git a/rocketemu/driver/build.rs b/rocketemu/driver/build.rs new file mode 100644 index 000000000..7b1e05015 --- /dev/null +++ b/rocketemu/driver/build.rs @@ -0,0 +1,20 @@ +fn main() { + const SEARCH_DIRS: [&str; 2] = ["ROCKET_DPI_DIR", "TESTBENCH_LIB_DIR"]; + SEARCH_DIRS.iter().for_each(|env| { + let dir = + std::env::var(env).unwrap_or_else(|_| panic!("ERROR: {} environment variable not set", &env)); + println!("cargo:rustc-link-search=native={}/lib", &dir); + println!("cargo:rerun-if-env-changed={}", env); + }); + + // link order matters! + // verilator_main <- VTestBench <-- verilated <- dpi_c <- stdc++ + // verilated <- libz + // that's why we must split verilator_main and dpi_c + println!("cargo:rustc-link-lib=static=dpi_pre_link"); + println!("cargo:rustc-link-lib=static=VTestBench"); + println!("cargo:rustc-link-lib=static=verilated"); + println!("cargo:rustc-link-lib=static=dpi"); + println!("cargo:rustc-link-lib=static=stdc++"); + println!("cargo:rustc-link-lib=dylib=z"); +} diff --git a/rocketemu/driver/default.nix b/rocketemu/driver/default.nix new file mode 100644 index 000000000..88134307a --- /dev/null +++ b/rocketemu/driver/default.nix @@ -0,0 +1,33 @@ +{ rustPlatform +, c-dpi-lib +, rocketv-verilated-csrc +, zlib +, rust-analyzer +, rustfmt +}: +let + self = rustPlatform.buildRustPackage { + name = "rocket-driver"; + + src = ./.; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + buildInputs = [ zlib ]; + + env = { + ROCKET_DPI_DIR = toString c-dpi-lib; + TESTBENCH_LIB_DIR = toString rocketv-verilated-csrc; + }; + + passthru.devShell = self.overrideAttrs (old: { + nativeBuildInputs = old.nativeBuildInputs ++ [ + rust-analyzer + rustfmt + ]; + }); + }; +in +self diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs new file mode 100644 index 000000000..79ae93d19 --- /dev/null +++ b/rocketemu/driver/src/dpi.rs @@ -0,0 +1,216 @@ +#![allow(non_snake_case)] +#![allow(unused_variables)] + +use std::ffi::{c_char, c_int, c_longlong, CString}; +use std::ptr; +use clap::Parser; +use tracing::debug; + +use crate::sim::{SimulationArgs, Simulator}; + +pub type SvScalar = u8; +pub type SvBit = SvScalar; +pub type SvBitVecVal = u32; + +// -------------------------- +// preparing data structures +// -------------------------- + +///! Read 2^aw_size from *payload, and split it at dlen/16. +///! +///! Return (strobe in bit, data in byte) +unsafe fn load_from_payload( + payload: &*const SvBitVecVal, + aw_size: c_longlong, + dlen: u32, +) -> (Vec, &[u8]) { + let src = *payload as *mut u8; + let strb_width_in_byte = (dlen / 8 / 8) as usize; + let payload_size_in_byte = (1 << aw_size as usize) + strb_width_in_byte; + let byte_vec = std::slice::from_raw_parts(src, payload_size_in_byte); + let strobe = &byte_vec[0..strb_width_in_byte]; + let data = &byte_vec[strb_width_in_byte..]; + + let masks: Vec = strobe + .into_iter() + .flat_map(|strb| { + let mask: Vec = (0..8).map(|i| (strb & (1 << i)) != 0).collect(); + mask + }) + .collect(); + assert!( + masks.len() == data.len(), + "strobe bit width is not aligned with data byte width" + ); + + debug!( + "load {payload_size_in_byte} byte from payload: raw_data={} strb={} data={}", + hex::encode(byte_vec), + hex::encode(strobe), + hex::encode(data), + ); + + (masks, data) +} + + +fn write_to_pointer(dst: *mut u8, data: &[u8], n: usize) { + unsafe { + for i in 0..n { + ptr::write(dst.add(i), data[i]); + } + } +} + +unsafe fn fill_axi_read_payload(dst: *mut SvBitVecVal, dlen: u32, data: &[u8]) { + let data_len = (256 / 8) * dlen as usize; + assert!(data.len() <= data_len); + let dst = dst as *mut u8; + write_to_pointer(dst, data, data.len()); +} + +//---------------------- +// dpi functions +//---------------------- + +#[no_mangle] +unsafe extern "C" fn axi_write_loadStoreAXI_rs( + target: *mut (), + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_loadStore (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize=2^{awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + + let sim = &mut *(target as *mut Simulator); + let (strobe, data) = load_from_payload(&payload, 1 << awsize, 256); + sim.axi_write(awaddr as u32, &strobe, data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_loadStoreAXI_rs( + target: *mut (), + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_highBandwidth (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let sim = &mut *(target as *mut Simulator); + let response = sim.axi_read_load_store(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, sim.dlen, &response.data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_instructionFetchAXI_rs( + target: *mut (), + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_indexed (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let driver = &mut *(target as *mut Simulator); + let response = driver.axi_read_instruction(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response.data); +} + +#[no_mangle] +unsafe extern "C" fn cosim_init_rs(call_init: *mut SvBit) -> *mut () { + let args = SimulationArgs::parse(); + *call_init = 1; + let driver = Box::new(Simulator::new(args)); + Box::into_raw(driver) as *mut () +} + +#[no_mangle] +unsafe extern "C" fn cosim_watchdog_rs(target: *mut (), reason: *mut c_char) { + // watchdog dpi call would be called before initialization, guard on null target + if !target.is_null() { + let sim = &mut *(target as *mut Simulator); + *reason = sim.watchdog() as c_char + } +} + +//-------------------------------- +// import functions and wrappers +//-------------------------------- + +#[link(name = "dpi_pre_link")] +extern "C" { + fn verilator_main_c(argc: c_int, argv: *mut *mut c_char) -> c_int; + + // FIXME: support waveform + //fn dump_wave_c(path: *const c_char); + + // FIXME: get cycle from simulationTime + //fn get_t_c() -> u64; +} + +/* pub(crate) fn get_t() -> u64 { + unsafe { get_t_c() / 10 } +} */ + +pub(crate) fn verilator_main() { + let mut c_args_ptr: Vec<*mut c_char> = std::env::args() + .collect::>() + .iter() + .map(|arg| CString::new(arg.as_str()).unwrap()) + .map(|arg| arg.as_ptr() as *mut c_char) + .collect(); + + c_args_ptr.push(ptr::null_mut()); + + let argc = std::env::args().len() as c_int; + + let argv = c_args_ptr.as_mut_ptr(); + + unsafe { + verilator_main_c(argc, argv); + } +} + +/* pub(crate) fn dump_wave(path: &str) { + let path_cstring = CString::new(path).unwrap(); + let path_ptr: *const c_char = path_cstring.as_ptr(); + unsafe { + dump_wave_c(path_ptr); + } +} */ diff --git a/rocketemu/driver/src/main.rs b/rocketemu/driver/src/main.rs new file mode 100644 index 000000000..a5655f059 --- /dev/null +++ b/rocketemu/driver/src/main.rs @@ -0,0 +1,7 @@ +mod dpi; +mod sim; + +fn main() { + println!("starting verilator"); + dpi::verilator_main(); +} diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs new file mode 100644 index 000000000..b2fab20cb --- /dev/null +++ b/rocketemu/driver/src/sim.rs @@ -0,0 +1,236 @@ +use clap::{arg, Parser}; +use tracing::{info, debug, trace}; +use std::collections::HashMap; +use std::os::unix::fs::FileExt; +use std::{ + fs, + path::{Path, PathBuf}, +}; + +use anyhow::Context; +use elf::abi::STT_FUNC; +use elf::{ + abi::{EM_RISCV, ET_EXEC, PT_LOAD}, + endian::LittleEndian, + ElfStream, +}; + +pub(crate) struct AxiReadPayload { + pub(crate) data: Vec, +} + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct SimulationArgs { + /// Path to the ELF file + #[arg(long)] + pub elf_file: PathBuf, + + /// Path to the log file + #[arg(long)] + pub log_file: Option, + + /// Log level: trace, debug, info, warn, error + #[arg(long, default_value = "info")] + pub log_level: String, +} + +// FIXME: fix FunctionSym +#[derive(Debug)] +#[allow(dead_code)] +pub struct FunctionSym { + #[allow(dead_code)] + pub(crate) name: String, + #[allow(dead_code)] + pub(crate) info: u8, +} +pub type FunctionSymTab = HashMap; + +const SIM_MEM_SIZE: usize = 1usize << 32; +const RESET_VECTOR_ADDR: usize = 10_000; + +#[derive(Debug)] +pub struct Simulator { + pub(crate) mem: Vec, + #[allow(dead_code)] + pub(crate) fn_sym_tab: FunctionSymTab, + pub(crate) dlen: u32, +} + +pub static WATCHDOG_CONTINUE: u8 = 0; +pub static WATCHDOG_TIMEOUT: u8 = 1; + +impl Simulator { + pub fn new(args: SimulationArgs) -> Self { + let (mem, fn_sym_tab) = Self::load_elf(&args.elf_file).expect("fail creating simulator"); + + Self { + mem, + fn_sym_tab, + dlen: option_env!("DESIGN_DLEN") + .map(|dlen| dlen.parse().expect("fail to parse dlen into u32 digit")) + .unwrap_or(256), + } + } + + // FIXME: In current implementation, all the ELF sections are read without considering bytes order. + // We might want to take care of those information with lenntoho to convert them into host byte. + // The *elf* crate hopefully will handle this for us, but I don't do further investigation yet. (assign to @Avimitin) + pub fn load_elf(path: &Path) -> anyhow::Result<(Vec, FunctionSymTab)> { + let file = fs::File::open(path).with_context(|| "reading ELF file")?; + let mut elf: ElfStream = + ElfStream::open_stream(&file).with_context(|| "parsing ELF file")?; + + if elf.ehdr.e_machine != EM_RISCV { + anyhow::bail!("ELF is not in RISC-V"); + } + + if elf.ehdr.e_type != ET_EXEC { + anyhow::bail!("ELF is not an executable"); + } + + if elf.ehdr.e_phnum == 0 { + anyhow::bail!("ELF has zero size program header"); + } + + // FIXME: + // 1. If we use reduce map instead of manipulating mutable memory, does it affect + // runtime overhead? Does rustc help us optimize this operation? + // 2. The default ProgramHeader us u64 for Elf32_phdr and Elf64_phdr. + let mut mem: Vec = vec![0; SIM_MEM_SIZE]; + elf.segments().iter().filter(|phdr| phdr.p_type == PT_LOAD).for_each(|phdr| { + let vaddr: usize = phdr.p_vaddr.try_into().expect("fail converting vaddr(u64) to usize"); + let addr = RESET_VECTOR_ADDR + vaddr; + let filesz: usize = phdr.p_filesz.try_into().expect("fail converting p_filesz(u64) to usize"); + // The `offset` of the read_at method is relative to the start of the file and thus independent from the current cursor. + file.read_at(&mut mem[addr..addr + filesz], phdr.p_offset).unwrap_or_else(|err| { + panic!( + "fail reading ELF into mem with vaddr={}, filesz={}, offset={}. Error detail: {}", + vaddr, filesz, phdr.p_offset, err + ) + }); + }); + + // FIXME: now the symbol table doesn't contain any function value + let mut fn_sym_tab = FunctionSymTab::new(); + let symbol_table = + elf.symbol_table().with_context(|| "reading symbol table(SHT_SYMTAB) from ELF")?; + if let Some((parsed_table, string_table)) = symbol_table { + parsed_table + .iter() + // st_symtype = symbol.st_info & 0xf (But why masking here?) + .filter(|sym| sym.st_symtype() == STT_FUNC) + .for_each(|sym| { + let name = string_table + .get(sym.st_name as usize) + .unwrap_or_else(|_| panic!("fail to get name at st_name={}", sym.st_name)); + fn_sym_tab.insert( + sym.st_value, + FunctionSym { name: name.to_string(), info: sym.st_symtype() }, + ); + }); + } else { + debug!("load_elf: symtab not found"); + }; + + Ok((mem, fn_sym_tab)) + } + + fn write_mem(&mut self, addr: u32, alignment_bytes: u32, masks: &[bool], data: &[u8]) { + // early return with strobe 0 write + if !masks.iter().any(|&x| x) { + return; + } + let size = data.len() as u32; + debug!("write mem: size={size}, addr={addr:#x}"); + + assert!( + (addr % size == 0 || addr % alignment_bytes == 0) && size >= alignment_bytes, + "unaligned write access addr={addr} size={size}bytes dlen={alignment_bytes}bytes" + ); + + masks.iter().enumerate().filter(|(_, &m)| m).for_each(|(i, _)| { + self.mem[addr as usize + i] = data[i]; + }); + } + + pub fn axi_write(&mut self, addr: u32, strobe: &[bool], data: &[u8]) { + // panic on misalign mask and data + assert_eq!( + strobe.len(), + data.len(), + "write_mem: strobe size is not equal to data size" + ); + self.write_mem(addr, self.dlen / 8, strobe, data); + } + + fn read_mem(&mut self, addr: u32, size: u32, alignment_bytes: u32) -> Vec { + assert!( + addr % size == 0 || addr % alignment_bytes == 0, + "unaligned access addr={addr} size={size}bytes dlen={alignment_bytes}bytes" + ); + let residue_addr = addr % alignment_bytes; + let aligned_addr = addr - residue_addr; + if size < alignment_bytes { + // narrow + (0..alignment_bytes) + .map(|i| { + let i_addr = aligned_addr + i; + if addr <= i_addr && i_addr < addr + size { + self.mem[i_addr as usize] + } else { + 0 + } + }) + .collect() + } else { + // normal + (0..size).map(|i| self.mem[(addr + i) as usize]).collect() + } + } + + pub fn axi_read_instruction(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + assert!(size <= 4); + let data = self.read_mem(addr, size, 4); + let data_hex = hex::encode(&data); + info!( + "[{}] axi_read_indexed (addr={addr:#x}, size={size}, data={data_hex})", + 0 + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_read_load_store(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + let data = self.read_mem(addr, size, self.dlen / 8); + let data_hex = hex::encode(&data); + info!( + "[{}] axi_read_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", + 0 + ); + AxiReadPayload { data } + } + + pub(crate) fn watchdog(&mut self) -> u8 { + trace!("watchdog continue"); + WATCHDOG_CONTINUE + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_load_elf() { + let _ = Simulator::load_elf(Path::new("./result/bin/codegen.vsseg4e8_v.elf")).unwrap(); + // TODO: verify address and bit + } + + #[test] + fn x86_should_fail() { + let err = Simulator::load_elf(Path::new("/bin/cp")).unwrap_err(); + assert_eq!(format!("{}", err), "ELF is not in RISC-V") + } +} diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index 509605840..2e92c97ea 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -36,7 +36,7 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar dut.io.msip := 0.U dut.io.buserror := 0.U - dut.io.resetVector := 10000000.U + dut.io.resetVector := 10000.U val simulationTime = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) simulationTime := simulationTime + 1.U From 724b69896d30fc61316db38ed050809d9f4b8325 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 12 Jul 2024 16:23:19 +0800 Subject: [PATCH 093/140] [rocketemu] update TestBench implementation This commit keep AXI agent implementation in sync with the axi4-t1 branch, with ClockGen and DumpWave module embedded inside the TestBench module. Signed-off-by: Avimitin --- rocketemu/src/AXI4SlaveAgent.scala | 180 +++++++++++++---------------- rocketemu/src/ClockGen.scala | 22 ---- rocketemu/src/DumpWave.scala | 21 ---- rocketemu/src/TestBench.scala | 88 ++++++++------ 4 files changed, 131 insertions(+), 180 deletions(-) delete mode 100644 rocketemu/src/ClockGen.scala delete mode 100644 rocketemu/src/DumpWave.scala diff --git a/rocketemu/src/AXI4SlaveAgent.scala b/rocketemu/src/AXI4SlaveAgent.scala index 032d0c397..d14fdeb91 100644 --- a/rocketemu/src/AXI4SlaveAgent.scala +++ b/rocketemu/src/AXI4SlaveAgent.scala @@ -5,48 +5,32 @@ package org.chipsalliance.t1.rocketv.dpi // TODO: upstream to AMBA as VIP import chisel3._ -import chisel3.util.circt.dpi.{RawClockedNonVoidFunctionCall, RawClockedVoidFunctionCall} -import chisel3.util.{scanLeftOr, OHToUInt, Reverse, Valid} -import org.chipsalliance.amba.axi4.bundle.{ - ARChannel, - ARFlowControl, - AWChannel, - AWFlowControl, - AXI4BundleParameter, - AXI4ROIrrevocableVerilog, - AXI4RWIrrevocableVerilog, - AXI4WOIrrevocableVerilog, - BChannel, - BFlowControl, - RChannel, - RFlowControl, - WChannel, - WFlowControl -} +import chisel3.util.circt.dpi.{RawClockedVoidFunctionCall, RawUnclockedNonVoidFunctionCall} +import chisel3.util.{OHToUInt, scanLeftOr} +import org.chipsalliance.amba.axi4.bundle.{ARChannel, ARFlowControl, AWChannel, AWFlowControl, AXI4BundleParameter, AXI4ROIrrevocableVerilog, AXI4RWIrrevocableVerilog, AXI4WOIrrevocableVerilog, BChannel, BFlowControl, RChannel, RFlowControl, WChannel, WFlowControl} -case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int) +case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int, readPayloadSize: Int, writePayloadSize: Int) class AXI4SlaveAgentInterface(parameter: AXI4SlaveAgentParameter) extends Bundle { val clock: Clock = Input(Clock()) val reset: Reset = Input(Reset()) - val channelId: UInt = Input(Const(UInt(64.W))) + val channelId: UInt = Input(Const(UInt(64.W))) + // don't issue read DPI + val gateRead: Bool = Input(Bool()) + // don't issue write DPI + val gateWrite: Bool = Input(Bool()) val channel = Flipped( org.chipsalliance.amba.axi4.bundle.verilog.irrevocable(parameter.axiParameter) ) } -class WritePayload(dataWidth: Int) extends Bundle { - val data = Vec(256, UInt(dataWidth.W)) - val strb = Vec(256, UInt((dataWidth / 8).W)) +class WritePayload(length: Int, dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) + val strb = Vec(length, UInt((dataWidth / 8).W)) } -class ReadPayload(dataWidth: Int) extends Bundle { - require( - Seq(8, 16, 32, 64, 128, 256, 512, 1024).contains(dataWidth), - "A1.2.1: The data bus, which can be 8, 16, 32, 64, 128, 256, 512, or 1024 bits wide. A read response signal indicating the completion status of the read transaction." - ) - val data = Vec(256, UInt(dataWidth.W)) - val beats = UInt(8.W) +class ReadPayload(length: Int,dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) } // consume transaction from DPI, drive RTL signal @@ -66,69 +50,87 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) private class WriteManager( channel: AWChannel with AWFlowControl with WChannel with WFlowControl with BChannel with BFlowControl) { withClockAndReset(io.clock, io.reset) { - - /** There is an valid write transaction. */ - val valid = RegInit(0.U.asTypeOf(Bool())) + /** indicate AW is issued. */ + val awIssued = RegInit(0.U.asTypeOf(Bool())) + /** indicate W is finished, used to wake up B channel. */ + val last = RegInit(0.U.asTypeOf(Bool())) + /** indicate there is an ongoing write transaction. */ + val busy = RegInit(0.U.asTypeOf(Bool())) /** memory to store the write payload * @todo limit the payload size based on the RTL configuration. */ - val writePayload = RegInit(0.U.asTypeOf(new WritePayload(parameter.axiParameter.dataWidth))) - + val writePayload = RegInit(0.U.asTypeOf(new WritePayload(parameter.writePayloadSize, parameter.axiParameter.dataWidth))) /** AWID, latch at AW fire, used at B fire. */ - val writeId = RegInit(0.U(16.W)) + val awid = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWID))) + val awaddr = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWADDR))) + val awlen = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLEN))) + val awsize = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWSIZE))) + val awburst = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWBURST))) + val awlock = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLOCK))) + val awcache = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWCACHE))) + val awprot = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWPROT))) + val awqos = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWQOS))) + val awregion = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWREGION))) /** index the payload, used to write [[writePayload]] */ val writeIdx = RegInit(0.U.asTypeOf(UInt(8.W))) - /** indicate W is finished, used to wake up B channel. */ - val last = RegInit(0.U.asTypeOf(Bool())) - // AW - channel.AWREADY := !valid + channel.AWREADY := !busy || (busy && !awIssued) when(channel.AWREADY && channel.AWVALID) { - assert(valid === false.B) - writeId := channel.AWID - valid := true.B - writeIdx := 0.U + awIssued := true.B + busy := true.B + awid := channel.AWID + awaddr := channel.AWADDR + awlen := channel.AWLEN + awsize := channel.AWSIZE + awburst := channel.AWBURST + awlock := channel.AWLOCK + awcache := channel.AWCACHE + awprot := channel.AWPROT + awqos := channel.AWQOS + awregion := channel.AWREGION } // W - channel.WREADY := true.B + channel.WREADY := !busy || (busy && !last) when(channel.WVALID && channel.WREADY) { + busy := true.B writePayload.data(writeIdx) := channel.WDATA writePayload.strb(writeIdx) := channel.WSTRB writeIdx := writeIdx + 1.U when(channel.WLAST) { last := true.B - RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( - io.clock, - when.cond, - io.channelId, - channel.AWID.asTypeOf(UInt(64.W)), - channel.AWADDR.asTypeOf(UInt(64.W)), - channel.AWLEN.asTypeOf(UInt(64.W)), - channel.AWSIZE.asTypeOf(UInt(64.W)), - channel.AWBURST.asTypeOf(UInt(64.W)), - channel.AWLOCK.asTypeOf(UInt(64.W)), - channel.AWCACHE.asTypeOf(UInt(64.W)), - channel.AWPROT.asTypeOf(UInt(64.W)), - channel.AWQOS.asTypeOf(UInt(64.W)), - channel.AWREGION.asTypeOf(UInt(64.W)), - WireDefault(writePayload) - ) } } // B - channel.BVALID := last - channel.BID := writeId + channel.BVALID := last && awIssued + channel.BID := awid channel.BRESP := 0.U(2.W) // OK channel.BUSER := DontCare when(channel.BVALID && channel.BREADY) { - assert(valid === true.B) - valid := false.B + RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( + io.clock, + when.cond && !io.gateWrite, + io.channelId, + // handle AW and W at same beat. + Mux(channel.AWREADY && channel.AWVALID, channel.AWID, awid.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWADDR, awaddr.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWLEN, awlen.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWSIZE, awsize.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWBURST, awburst.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWLOCK, awlock.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWCACHE, awcache.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWPROT, awprot.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWQOS, awqos.asTypeOf(UInt(64.W))), + Mux(channel.AWREADY && channel.AWVALID, channel.AWREGION, awregion.asTypeOf(UInt(64.W))), + WireDefault(writePayload) + ) + awIssued := false.B last := false.B + writeIdx := 0.U } } } @@ -137,43 +139,29 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) withClockAndReset(io.clock, io.reset) { class CAMValue extends Bundle { val arid = UInt(16.W) - val readPayload = new ReadPayload(parameter.axiParameter.dataWidth) + val arlen = UInt(8.W) + val readPayload = new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth) val readPayloadIndex = UInt(8.W) val valid = Bool() } - /** CAM to maintain order of read requests. This is maintained as FIFO. */ val cam: Vec[CAMValue] = RegInit(0.U.asTypeOf(Vec(parameter.outstanding, new CAMValue))) - /** find first one circuit. */ def ffo(input: UInt): UInt = ((~(scanLeftOr(input) << 1)).asUInt & input)(input.getWidth - 1, 0) - /** find first non-valid slot in [[cam]] */ val firstEmpty: UInt = OHToUInt(ffo(VecInit(cam.map(!_.valid)).asUInt)) - /** there are no outstanding read requests. */ val camIsEmpty = VecInit(cam.map(content => !content.valid)).asUInt.andR - - /** find oldest read. */ + /** find oldest to index which cam to use. */ val oldest = OHToUInt(ffo(VecInit(cam.map(content => content.valid)).asUInt)) - /** index to select value from [[cam]] - * if cam empty, always select the next allocate value. - * if cam non-empty, update to oldest at each transaction end, this can be changed to random response with LFSR. - * @todo in the future, we can provide a fine-grand control to this index to provide out-of-order return. - */ - val rIndex = RegInit(0.U.asTypeOf(UInt(16.W))) - // AR - channel.ARREADY := VecInit(cam.map(!_.valid)).asUInt.andR + channel.ARREADY := VecInit(cam.map(!_.valid)).asUInt.orR when(channel.ARREADY && channel.ARVALID) { cam(firstEmpty).arid := channel.ARID - cam(firstEmpty).readPayload := RawClockedNonVoidFunctionCall( - s"axi_read_${parameter.name}", - new ReadPayload(parameter.axiParameter.dataWidth) - )( - io.clock, - when.cond, + cam(firstEmpty).arlen := channel.ARLEN + cam(firstEmpty).readPayload := RawUnclockedNonVoidFunctionCall(s"axi_read_${parameter.name}", new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth))( + when.cond && !io.gateRead, io.channelId, channel.ARID.asTypeOf(UInt(64.W)), channel.ARADDR.asTypeOf(UInt(64.W)), @@ -191,27 +179,17 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) } // R - rIndex := Mux( - camIsEmpty, - firstEmpty, // if cam empty, always select the next allocate value. - Mux( - channel.RREADY && channel.RVALID && channel.RLAST, - oldest, // if cam non-empty, update to oldest at each transaction end, this can be changed to random response with LFSR. - rIndex - ) - ) - channel.RVALID := VecInit(cam.map(_.valid)).asUInt.orR - channel.RID := cam(rIndex).arid - channel.RDATA := cam(rIndex).readPayload.data(cam(rIndex).readPayloadIndex) + channel.RID := cam(oldest).arid + channel.RDATA := cam(oldest).readPayload.data(cam(oldest).readPayloadIndex) channel.RRESP := 0.U // OK - channel.RLAST := cam(rIndex).readPayload.beats === cam(rIndex).readPayloadIndex + channel.RLAST := (cam(oldest).arlen === cam(oldest).readPayloadIndex) && cam(oldest).valid channel.RUSER := DontCare when(channel.RREADY && channel.RVALID) { // increase index - cam(rIndex).readPayloadIndex := cam(rIndex).readPayloadIndex + 1.U + cam(oldest).readPayloadIndex := cam(oldest).readPayloadIndex + 1.U when(channel.RLAST) { - cam(rIndex).valid := false.B + cam(oldest).valid := false.B } } } diff --git a/rocketemu/src/ClockGen.scala b/rocketemu/src/ClockGen.scala deleted file mode 100644 index c43a6eabe..000000000 --- a/rocketemu/src/ClockGen.scala +++ /dev/null @@ -1,22 +0,0 @@ -package org.chipsalliance.t1.rocketv - -import chisel3.{Bool, Clock, Output} -import chisel3.experimental.ExtModule -import chisel3.probe._ -import chisel3.util.HasExtModuleInline - -class ClockGen extends ExtModule with HasExtModuleInline { - setInline(s"$desiredName.sv", - s"""module $desiredName(output reg clock, output reg reset); - | initial begin - | clock = 1'b0; - | reset = 1'b1; - | end - | initial #(11) reset = 1'b0; - | always #10 clock = ~clock; - |endmodule - |""".stripMargin - ) - val clock = IO(Output(Bool())) - val reset = IO(Output(Bool())) -} diff --git a/rocketemu/src/DumpWave.scala b/rocketemu/src/DumpWave.scala deleted file mode 100644 index a2d3c3d69..000000000 --- a/rocketemu/src/DumpWave.scala +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: 2022 Jiuyang Liu - -package org.chipsalliance.t1.rocketv.dpi - -import chisel3.experimental.ExtModule -import chisel3.util.HasExtModuleInline - -class DumpWave extends ExtModule with HasExtModuleInline { - setInline( - s"DumpWave.sv", - s"""module DumpWave; - |export "DPI-C" function DumpWave; - |function DumpWave(input string file); - |$$dumpfile(file); - |$$dumpvars(0); - |endfunction; - |endmodule - |""".stripMargin - ) -} diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index 2e92c97ea..ebd68943d 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -4,29 +4,52 @@ package org.chipsalliance.t1.rocketv import chisel3._ -import chisel3.experimental.SerializableModuleGenerator +import chisel3.experimental.{ExtModule, SerializableModuleGenerator} import chisel3.experimental.dataview.DataViewable -import chisel3.util.circt.dpi.{ - RawClockedNonVoidFunctionCall, - RawClockedVoidFunctionCall, - RawUnlockedNonVoidFunctionCall -} +import chisel3.util.{log2Ceil, HasExtModuleInline, PopCount, UIntToOH, Valid} import org.chipsalliance.amba.axi4.bundle._ import org.chipsalliance.t1.rocketv.dpi._ -import org.chipsalliance.rocketv.{Frontend, RocketTile, RocketTileParameter} - -class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTileParameter]) extends RawModule with ImplicitClock with ImplicitReset { - val clockGen = Module(new ClockGen) - Module(new DumpWave) +import org.chipsalliance.rocketv.{RocketTile, RocketTileParameter} +class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTileParameter]) + extends RawModule + with ImplicitClock + with ImplicitReset { override protected def implicitClock: Clock = clockGen.clock.asClock override protected def implicitReset: Reset = clockGen.reset - val clock: Clock = clockGen.clock.asClock - val reset: Bool = clockGen.reset + val clockGen = Module(new ExtModule with HasExtModuleInline { + override def desiredName = "ClockGen" + setInline( + s"$desiredName.sv", + s"""module $desiredName(output reg clock, output reg reset); + | export "DPI-C" function dump_wave; + | function dump_wave(input string file); + | $$dumpfile(file); + | $$dumpvars(0); + | endfunction; + | + | import "DPI-C" function void cosim_init(); + | initial begin + | cosim_init(); + | clock = 1'b0; + | reset = 1'b1; + | end + | initial #(11) reset = 1'b0; + | always #10 clock = ~clock; + |endmodule + |""".stripMargin + ) + val clock = IO(Output(Bool())) + val reset = IO(Output(Bool())) + }) - val dut: RocketTile = withClockAndReset(clock, reset)(Module(generator.module())) + val clock: Clock = clockGen.clock.asClock + val reset: Bool = clockGen.reset + val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + simulationTime := simulationTime + 1.U + val dut: RocketTile = withClockAndReset(clock, reset)(Module(generator.module())) dut.io.clock := clockGen.clock.asClock dut.io.reset := clockGen.reset dut.io.hartid := 0.U @@ -38,25 +61,6 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar dut.io.resetVector := 10000.U - val simulationTime = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) - simulationTime := simulationTime + 1.U - - // simulation env - withClockAndReset(clock, reset) { - // TODO: this initial way cannot happen before reset... - val initFlag = RegInit(true.B) - val callInit = RawUnlockedNonVoidFunctionCall("cosim_init", Bool())(initFlag).asInstanceOf[Bool] - when(callInit) { - initFlag := false.B - printf(cf"""{"event":"simulationStart","parameter":{"cycle": ${simulationTime}}}\n""") - } - val watchdog = - RawUnlockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U).asInstanceOf[UInt] - when(watchdog =/= 0.U) { - stop(cf"""{"event":"simulationStop","parameter":{"reason": ${watchdog},"cycle": ${simulationTime}}}\n""") - } - } - // Memory Drivers val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] val instFetchAgent = Module( @@ -64,7 +68,9 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar AXI4SlaveAgentParameter( name = "instructionFetchAXI", axiParameter = instFetchAXI.parameter, - outstanding = 4 + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 ) ).suggestName("axi4_channel0_instructionFetchAXI") ) @@ -74,11 +80,19 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar instFetchAgent.io.clock := clock instFetchAgent.io.reset := reset instFetchAgent.io.channelId := 0.U + instFetchAgent.io.gateRead := false.B + instFetchAgent.io.gateWrite := true.B val loadStoreAXI = dut.io.loadStoreAXI.viewAs[AXI4RWIrrevocableVerilog] val loadStoreAgent = Module( new AXI4SlaveAgent( - AXI4SlaveAgentParameter(name = "loadStoreAXI", axiParameter = loadStoreAXI.parameter, outstanding = 4) + AXI4SlaveAgentParameter( + name = "loadStoreAXI", + axiParameter = loadStoreAXI.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) ).suggestName("axi4_channel1_loadStoreAXI") ) loadStoreAgent.io.channel match { @@ -86,5 +100,7 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar } loadStoreAgent.io.clock := clock loadStoreAgent.io.reset := reset - loadStoreAgent.io.channelId := 1.U + loadStoreAgent.io.channelId := 0.U + loadStoreAgent.io.gateRead := false.B + loadStoreAgent.io.gateWrite := true.B } From 04e5cfb7191aeed8b68f341bbf080f80b7b62682 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 12 Jul 2024 18:15:14 +0800 Subject: [PATCH 094/140] [rocketemu] update DPI C binding Signed-off-by: Avimitin --- rocketemu/dpi/CMakeLists.txt | 6 ++- rocketemu/dpi/dpi.cc | 78 ++++++++++++++++++----------------- rocketemu/dpi/dpi.h | 56 +++++++++++++++---------- rocketemu/dpi/dpi_pre_link.cc | 6 ++- rocketemu/dpi/dpi_pre_link.h | 2 + 5 files changed, 85 insertions(+), 63 deletions(-) diff --git a/rocketemu/dpi/CMakeLists.txt b/rocketemu/dpi/CMakeLists.txt index 215d7984b..7579134b2 100644 --- a/rocketemu/dpi/CMakeLists.txt +++ b/rocketemu/dpi/CMakeLists.txt @@ -28,7 +28,6 @@ if (NOT DEFINED VERILATED_INC_DIR) if (VERILATED_INC_DIR STREQUAL "") message(FATAL_ERROR "You should specify verilated libs via -DVERILATED_INC_DIR or environment variable VERILATED_INC_DIR, but it seems not") endif() - message("Using VERILATED_INC_DIR ${VERILATED_INC_DIR}") endif() # include verilated headers @@ -45,4 +44,9 @@ target_include_directories(dpi PUBLIC ${verilator_DIR}/include/vltstd) target_include_directories(dpi_pre_link PUBLIC ${verilator_DIR}/include) target_include_directories(dpi_pre_link PUBLIC ${verilator_DIR}/include/vltstd) +if(DEFINED VM_TRACE) + target_compile_definitions(dpi PRIVATE VM_TRACE=1) + target_compile_definitions(dpi_pre_link PRIVATE VM_TRACE=1) +endif() + install(TARGETS dpi dpi_pre_link ARCHIVE) diff --git a/rocketemu/dpi/dpi.cc b/rocketemu/dpi/dpi.cc index 7d3cff38c..fcf59050f 100644 --- a/rocketemu/dpi/dpi.cc +++ b/rocketemu/dpi/dpi.cc @@ -8,50 +8,52 @@ extern "C" { void *dpi_call_target; -extern svLogic DumpWave(const char *file); - -extern void axi_read_instructionFetchAXI(long long channel_id, long long ar_id, - long long ar_addr, long long ar_len, - long long ar_size, long long ar_burst, - long long ar_lock, long long ar_cache, - long long ar_prot, long long ar_qos, - long long ar_region, - svBitVecVal *payload) { - axi_read_instructionFetchAXI_rs(dpi_call_target, channel_id, ar_id, ar_addr, - ar_len, ar_size, ar_burst, ar_lock, ar_cache, - ar_prot, ar_qos, ar_region, payload); +/// evaluate after AW and W is finished at corresponding channel_id. +void axi_write_loadStoreAXI(long long channel_id, long long awid, + long long awaddr, long long awlen, long long awsize, + long long awburst, long long awlock, + long long awcache, long long awprot, + long long awqos, long long awregion, + /// struct packed {bit [255:0][DLEN:0] data; + /// bit [255:0][DLEN/8:0] strb; } payload + const svBitVecVal *payload) { + axi_write_loadStoreAXI_rs(dpi_call_target, channel_id, awid, awaddr, awlen, + awsize, awburst, awlock, awcache, awprot, awqos, + awregion, payload); }; -extern void axi_read_loadStoreAXI(void *dpi_call_target, long long channel_id, - long long ar_id, long long ar_addr, - long long ar_len, long long ar_size, - long long ar_burst, long long ar_lock, - long long ar_cache, long long ar_prot, - long long ar_qos, long long ar_region, - svBitVecVal *payload) { - axi_read_loadStoreAXI(dpi_call_target, channel_id, ar_id, ar_addr, ar_len, - ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, - ar_region, payload); +/// evaluate at AR fire at corresponding channel_id. +void axi_read_loadStoreAXI( + long long channel_id, long long arid, long long araddr, long long arlen, + long long arsize, long long arburst, long long arlock, long long arcache, + long long arprot, long long arqos, long long arregion, + /// struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + svBitVecVal *payload) { + axi_read_loadStoreAXI_rs(dpi_call_target, channel_id, arid, araddr, arlen, + arsize, arburst, arlock, arcache, arprot, arqos, + arregion, payload); }; -extern void axi_write_loadStoreAXI(long long channel_id, long long aw_id, - long long aw_addr, long long aw_len, - long long aw_size, long long aw_burst, - long long aw_lock, long long aw_cache, - long long aw_prot, long long aw_qos, - long long aw_region, - const svBitVecVal *payload) { - axi_write_loadStoreAXI_rs(dpi_call_target, channel_id, aw_id, aw_addr, aw_len, - aw_size, aw_burst, aw_lock, aw_cache, aw_prot, - aw_qos, aw_region, payload); +/// evaluate at AR fire at corresponding channel_id. +void axi_read_instructionFetchAXI( + long long channel_id, long long arid, long long araddr, long long arlen, + long long arsize, long long arburst, long long arlock, long long arcache, + long long arprot, long long arqos, long long arregion, + /// struct packed {bit [255:0][31:0] data; byte beats; } payload + svBitVecVal *payload) { + axi_read_instructionFetchAXI_rs(dpi_call_target, channel_id, arid, araddr, + arlen, arsize, arburst, arlock, arcache, + arprot, arqos, arregion, payload); }; -extern void cosim_init(svBit *call_init) { - dpi_call_target = cosim_init_rs(call_init); -}; +/// evaluate after reset, and will only be called once returning *call_init = +/// true. +void cosim_init() { dpi_call_target = cosim_init_rs(); } -extern void cosim_watchdog(char *reason) { - cosim_watchdog_rs(dpi_call_target, reason); -}; +/// evaluate at every 1024 cycles, return reason = 0 to continue simulation, +/// other value is used as error code. +void cosim_watchdog(char *reason) { + cosim_watchdog_rs(dpi_call_target, reason); +} } // extern "C" diff --git a/rocketemu/dpi/dpi.h b/rocketemu/dpi/dpi.h index ae2564bb8..46f6224e9 100644 --- a/rocketemu/dpi/dpi.h +++ b/rocketemu/dpi/dpi.h @@ -10,29 +10,41 @@ extern "C" { extern void *dpi_call_target; -// Parameters came from AXIAgent.scala +/// evaluate after AW and W is finished at corresponding channel_id. +extern void +axi_write_loadStoreAXI_rs(void *dpi_call_target, long long channel_id, + long long awid, long long awaddr, long long awlen, + long long awsize, long long awburst, long long awlock, + long long awcache, long long awprot, long long awqos, + long long awregion, + /// struct packed {bit [255:0][DLEN:0] data; bit + /// [255:0][DLEN/8:0] strb; } payload + const svBitVecVal *payload); + +/// evaluate at AR fire at corresponding channel_id. +extern void axi_read_loadStoreAXI_rs( + void *dpi_call_target, long long channel_id, long long arid, + long long araddr, long long arlen, long long arsize, long long arburst, + long long arlock, long long arcache, long long arprot, long long arqos, + long long arregion, + /// struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + svBitVecVal *payload); + +/// evaluate at AR fire at corresponding channel_id. extern void axi_read_instructionFetchAXI_rs( - void *dpi_call_target, long long channel_id, long long ar_id, - long long ar_addr, long long ar_len, long long ar_size, long long ar_burst, - long long ar_lock, long long ar_cache, long long ar_prot, long long ar_qos, - long long ar_region, svBitVecVal *payload); - -extern void axi_read_loadStoreAXI_rs(void *dpi_call_target, - long long channel_id, long long ar_id, - long long ar_addr, long long ar_len, - long long ar_size, long long ar_burst, - long long ar_lock, long long ar_cache, - long long ar_prot, long long ar_qos, - long long ar_region, svBitVecVal *payload); - -extern void axi_write_loadStoreAXI_rs( - void *dpi_call_target, long long channel_id, long long aw_id, - long long aw_addr, long long aw_len, long long aw_size, long long aw_burst, - long long aw_lock, long long aw_cache, long long aw_prot, long long aw_qos, - long long aw_region, const svBitVecVal *payload); - -extern void* cosim_init_rs(svBit *call_init); - + void *dpi_call_target, long long channel_id, long long arid, + long long araddr, long long arlen, long long arsize, long long arburst, + long long arlock, long long arcache, long long arprot, long long arqos, + long long arregion, + /// struct packed {bit [255:0][31:0] data; byte beats; } payload + svBitVecVal *payload); + +/// evaluate after reset, and will only be called once returning *call_init = +/// true. returns dpi call target +extern void *cosim_init_rs(); + +/// evaluate at every 1024 cycles, return reason = 0 to continue simulation, +/// other value is used as error code. extern void cosim_watchdog_rs(void *dpi_call_target, char *reason); #ifdef __cplusplus diff --git a/rocketemu/dpi/dpi_pre_link.cc b/rocketemu/dpi/dpi_pre_link.cc index 2845e3e99..17cb86349 100644 --- a/rocketemu/dpi/dpi_pre_link.cc +++ b/rocketemu/dpi/dpi_pre_link.cc @@ -40,11 +40,13 @@ int verilator_main_c(int argc, char **argv) { return 0; } +#ifdef VM_TRACE void dump_wave_c(char *path) { Verilated::traceEverOn(true); - svSetScope(svGetScopeFromName("TOP.TestBench.DumpWave")); - DumpWave(path); + svSetScope(svGetScopeFromName("TOP.TestBench.clockGen")); + dump_wave(path); } +#endif uint64_t get_t_c() { if (contextp) { diff --git a/rocketemu/dpi/dpi_pre_link.h b/rocketemu/dpi/dpi_pre_link.h index a63ee7dab..cf2752e97 100644 --- a/rocketemu/dpi/dpi_pre_link.h +++ b/rocketemu/dpi/dpi_pre_link.h @@ -13,7 +13,9 @@ extern "C" { int verilator_main_c(int argc, char **argv); +#ifdef VM_TRACE void dump_wave_c(char *path); +#endif uint64_t get_t_c(); From 2ece09e98f55dc1b3a86446359bd74b72318ab8d Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 19 Jul 2024 17:50:13 +0800 Subject: [PATCH 095/140] [nix] add riscv-tests into overlay Signed-off-by: Avimitin --- nix/overlay.nix | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/nix/overlay.nix b/nix/overlay.nix index e4e226e49..3c95e3f67 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -97,5 +97,31 @@ rec { }; }; + riscv-tests = final.pkgsCross.riscv64-embedded.stdenv.mkDerivation rec { + pname = "riscv-tests"; + version = "7878085d2546af0eb7af72a1df00996d5d8c43fb"; + src = final.fetchgit { + url = "https://github.com/riscv-software-src/riscv-tests.git"; + rev = "${version}"; + fetchSubmodules = true; + hash = "sha256-3SUfmUHwvEG4Fi6YWLLhzMhASyL07euMmkIoc9leYFE="; + }; + + enableParallelBuilding = true; + + configureFlags = [ + # to match rocket-tools path + "--prefix=${placeholder "out"}/riscv64-unknown-elf" + ]; + buildPhase = "make RISCV_PREFIX=riscv64-none-elf-"; + installPhase = '' + runHook preInstall + make install + mkdir -p $out/debug/ + cp debug/*.py $out/debug/ + runHook postInstall + ''; + }; + t1 = final.callPackage ./t1 { }; } From 6466725b4fb076d74c3074fcd8b24fe06b411849 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 19 Jul 2024 18:05:25 +0800 Subject: [PATCH 096/140] [doc] add document about rocket emulator Signed-off-by: Avimitin --- README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/README.md b/README.md index 78e5c6bb9..43658082b 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,43 @@ If using clion, $ nix develop .#t1..ip.emu -c clion ipemu/csrc ``` +#### Rocket emulator + +Rocket emulator contains multiple build phrase: RTL -> MLIR Bytecode -> +system verilog -> verilated C sources -> Rust emulator. + +Most of the developer doesn't need to care about MLIR, system verilog and verilate detail. +To develop the Rocket-chip RTL, run: + +```bash +# This command provide a environment that contains mill, circt, espresso... development tools. +nix develop '.#t1.elaborator' +``` + +> Metals LSP users are recommended to switch to mill-bsp mode instead of the default bloop mode. + +To elaborate the RTLs, run mill or use the nix chroot: + +```bash +# for development +mill -i elaborator.runMain org.chipsalliance.t1.elaborator.Main +# for clean build +nix build .#t1.rocketv-mlirbc +``` + +To develop the emulator, use the below nix environment: + +```bash +nix develop .#t1.rocketv-emu.driver.devShell +``` + +This will setup the verilated C src in environment, download rust-analyzer. + +```bash +cd rocketemu/driver +cargo build --release +``` + #### Developing Testcases The `tests/` contains the testcases. There are four types of testcases: From 749ad42b8f053bf36f92b1eb284b4f96d9869da2 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 19 Jul 2024 21:29:37 +0800 Subject: [PATCH 097/140] [rocketemu] migrate AXI from master Signed-off-by: Avimitin --- rocketemu/driver/.rustfmt.toml | 4 + rocketemu/driver/Cargo.lock | 186 +++++++++++++++++++++++++++++ rocketemu/driver/Cargo.toml | 4 + rocketemu/driver/result | 1 + rocketemu/driver/src/dpi.rs | 28 +++-- rocketemu/driver/src/main.rs | 1 - rocketemu/driver/src/sim.rs | 96 ++++++++++++--- rocketemu/src/AXI4SlaveAgent.scala | 112 +++++++++-------- rocketemu/src/TestBench.scala | 18 +-- 9 files changed, 361 insertions(+), 89 deletions(-) create mode 100644 rocketemu/driver/.rustfmt.toml create mode 120000 rocketemu/driver/result diff --git a/rocketemu/driver/.rustfmt.toml b/rocketemu/driver/.rustfmt.toml new file mode 100644 index 000000000..bf1a32fd3 --- /dev/null +++ b/rocketemu/driver/.rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = false +tab_spaces = 2 +chain_width = 100 +struct_lit_width = 50 diff --git a/rocketemu/driver/Cargo.lock b/rocketemu/driver/Cargo.lock index 4d410cfa7..2e745aead 100644 --- a/rocketemu/driver/Cargo.lock +++ b/rocketemu/driver/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.14" @@ -57,6 +66,12 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "clap" version = "4.5.9" @@ -112,6 +127,7 @@ dependencies = [ "elf", "hex", "tracing", + "tracing-subscriber", ] [[package]] @@ -138,12 +154,55 @@ version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -168,6 +227,65 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "strsim" version = "0.11.1" @@ -185,6 +303,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tracing" version = "0.1.40" @@ -214,6 +342,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -228,6 +386,34 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/rocketemu/driver/Cargo.toml b/rocketemu/driver/Cargo.toml index 46c7e1b7c..5fb6c775a 100644 --- a/rocketemu/driver/Cargo.toml +++ b/rocketemu/driver/Cargo.toml @@ -6,6 +6,10 @@ edition = "2021" [dependencies] clap = { version = "4.4.18", features = ["derive"] } tracing = { version = "0.1.40" } +tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } elf = "0.7.4" anyhow = "1.0.86" hex = "0.4.3" + +[features] +trace = [] diff --git a/rocketemu/driver/result b/rocketemu/driver/result new file mode 120000 index 000000000..e86331456 --- /dev/null +++ b/rocketemu/driver/result @@ -0,0 +1 @@ +/nix/store/vp6gwp37wwsal5wgpqydnqqchkrb102h-riscv-tests-riscv64-none-elf-7878085d2546af0eb7af72a1df00996d5d8c43fb \ No newline at end of file diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 79ae93d19..1c520da50 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -1,9 +1,9 @@ #![allow(non_snake_case)] #![allow(unused_variables)] +use clap::Parser; use std::ffi::{c_char, c_int, c_longlong, CString}; use std::ptr; -use clap::Parser; use tracing::debug; use crate::sim::{SimulationArgs, Simulator}; @@ -22,19 +22,21 @@ pub type SvBitVecVal = u32; unsafe fn load_from_payload( payload: &*const SvBitVecVal, aw_size: c_longlong, - dlen: u32, + data_width: u32, ) -> (Vec, &[u8]) { let src = *payload as *mut u8; - let strb_width_in_byte = (dlen / 8 / 8) as usize; - let payload_size_in_byte = (1 << aw_size as usize) + strb_width_in_byte; + let data_width_in_byte = (data_width / 8) as usize; + let strb_width_in_byte = data_width_in_byte.div_ceil(8); // ceil divide by 8 to get byte width + let payload_size_in_byte = strb_width_in_byte + data_width_in_byte; // data width in byte let byte_vec = std::slice::from_raw_parts(src, payload_size_in_byte); let strobe = &byte_vec[0..strb_width_in_byte]; let data = &byte_vec[strb_width_in_byte..]; + let strb_width_in_bit = std::cmp::min(8, data_width_in_byte); let masks: Vec = strobe .into_iter() .flat_map(|strb| { - let mask: Vec = (0..8).map(|i| (strb & (1 << i)) != 0).collect(); + let mask: Vec = (0..strb_width_in_bit).map(|i| (strb & (1 << i)) != 0).collect(); mask }) .collect(); @@ -53,7 +55,6 @@ unsafe fn load_from_payload( (masks, data) } - fn write_to_pointer(dst: *mut u8, data: &[u8], n: usize) { unsafe { for i in 0..n { @@ -96,7 +97,7 @@ unsafe extern "C" fn axi_write_loadStoreAXI_rs( ); let sim = &mut *(target as *mut Simulator); - let (strobe, data) = load_from_payload(&payload, 1 << awsize, 256); + let (strobe, data) = load_from_payload(&payload, 1 << awsize, sim.dlen); sim.axi_write(awaddr as u32, &strobe, data); } @@ -164,8 +165,8 @@ unsafe extern "C" fn cosim_init_rs(call_init: *mut SvBit) -> *mut () { unsafe extern "C" fn cosim_watchdog_rs(target: *mut (), reason: *mut c_char) { // watchdog dpi call would be called before initialization, guard on null target if !target.is_null() { - let sim = &mut *(target as *mut Simulator); - *reason = sim.watchdog() as c_char + let sim = &mut *(target as *mut Simulator); + *reason = sim.watchdog() as c_char } } @@ -180,13 +181,14 @@ extern "C" { // FIXME: support waveform //fn dump_wave_c(path: *const c_char); - // FIXME: get cycle from simulationTime - //fn get_t_c() -> u64; + fn get_t_c() -> u64; } -/* pub(crate) fn get_t() -> u64 { +// FIXME: currently we are using verilator context_p as simulation time. +// But we should implement read cycle at TestBench top +pub(crate) fn get_t() -> u64 { unsafe { get_t_c() / 10 } -} */ +} pub(crate) fn verilator_main() { let mut c_args_ptr: Vec<*mut c_char> = std::env::args() diff --git a/rocketemu/driver/src/main.rs b/rocketemu/driver/src/main.rs index a5655f059..579e89c6a 100644 --- a/rocketemu/driver/src/main.rs +++ b/rocketemu/driver/src/main.rs @@ -2,6 +2,5 @@ mod dpi; mod sim; fn main() { - println!("starting verilator"); dpi::verilator_main(); } diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index b2fab20cb..b9cfb33dc 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -1,11 +1,13 @@ +use crate::dpi::get_t; + use clap::{arg, Parser}; -use tracing::{info, debug, trace}; use std::collections::HashMap; use std::os::unix::fs::FileExt; use std::{ fs, path::{Path, PathBuf}, }; +use tracing::{debug, error, info, trace}; use anyhow::Context; use elf::abi::STT_FUNC; @@ -33,6 +35,10 @@ pub struct SimulationArgs { /// Log level: trace, debug, info, warn, error #[arg(long, default_value = "info")] pub log_level: String, + + /// The timeout value + #[arg(long, default_value_t = 1_0000)] + pub timeout: u64, } // FIXME: fix FunctionSym @@ -46,8 +52,8 @@ pub struct FunctionSym { } pub type FunctionSymTab = HashMap; +// NOTE: make it configurable from cmd line? const SIM_MEM_SIZE: usize = 1usize << 32; -const RESET_VECTOR_ADDR: usize = 10_000; #[derive(Debug)] pub struct Simulator { @@ -55,6 +61,7 @@ pub struct Simulator { #[allow(dead_code)] pub(crate) fn_sym_tab: FunctionSymTab, pub(crate) dlen: u32, + pub(crate) timeout: u64, } pub static WATCHDOG_CONTINUE: u8 = 0; @@ -62,11 +69,26 @@ pub static WATCHDOG_TIMEOUT: u8 = 1; impl Simulator { pub fn new(args: SimulationArgs) -> Self { - let (mem, fn_sym_tab) = Self::load_elf(&args.elf_file).expect("fail creating simulator"); + let log_level: tracing::Level = args.log_level.parse().expect("fail to parse LOG level"); + let global_logger = tracing_subscriber::FmtSubscriber::builder() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_max_level(log_level) + .without_time() + .with_target(false) + .with_ansi(true) + .compact() + .finish(); + tracing::subscriber::set_global_default(global_logger) + .expect("internal error: fail to setup log subscriber"); + + // FIXME: pass e_entry to rocket + let (_FIXME_e_entry, mem, fn_sym_tab) = + Self::load_elf(&args.elf_file).expect("fail creating simulator"); Self { mem, fn_sym_tab, + timeout: args.timeout, dlen: option_env!("DESIGN_DLEN") .map(|dlen| dlen.parse().expect("fail to parse dlen into u32 digit")) .unwrap_or(256), @@ -76,7 +98,7 @@ impl Simulator { // FIXME: In current implementation, all the ELF sections are read without considering bytes order. // We might want to take care of those information with lenntoho to convert them into host byte. // The *elf* crate hopefully will handle this for us, but I don't do further investigation yet. (assign to @Avimitin) - pub fn load_elf(path: &Path) -> anyhow::Result<(Vec, FunctionSymTab)> { + pub fn load_elf(path: &Path) -> anyhow::Result<(u64, Vec, FunctionSymTab)> { let file = fs::File::open(path).with_context(|| "reading ELF file")?; let mut elf: ElfStream = ElfStream::open_stream(&file).with_context(|| "parsing ELF file")?; @@ -93,17 +115,26 @@ impl Simulator { anyhow::bail!("ELF has zero size program header"); } + debug!("ELF entry: 0x{:x}", elf.ehdr.e_entry); // FIXME: - // 1. If we use reduce map instead of manipulating mutable memory, does it affect - // runtime overhead? Does rustc help us optimize this operation? - // 2. The default ProgramHeader us u64 for Elf32_phdr and Elf64_phdr. + // 1. If we use reduce map, collecting spartial memory into a whole big one, + // instead of manipulating mutable memory, does it affect runtime overhead? + // Does rustc help us optimize this operation? + // 2. The default ProgramHeader use u64 for Elf32_phdr and Elf64_phdr, can we optimize this or + // just let it go. let mut mem: Vec = vec![0; SIM_MEM_SIZE]; elf.segments().iter().filter(|phdr| phdr.p_type == PT_LOAD).for_each(|phdr| { let vaddr: usize = phdr.p_vaddr.try_into().expect("fail converting vaddr(u64) to usize"); - let addr = RESET_VECTOR_ADDR + vaddr; let filesz: usize = phdr.p_filesz.try_into().expect("fail converting p_filesz(u64) to usize"); + debug!( + "Read loadable segments 0x{:x}..0x{:x} to memory 0x{:x}", + phdr.p_offset, + phdr.p_offset + filesz as u64, + vaddr + ); + // Load file start from offset into given mem slice // The `offset` of the read_at method is relative to the start of the file and thus independent from the current cursor. - file.read_at(&mut mem[addr..addr + filesz], phdr.p_offset).unwrap_or_else(|err| { + file.read_at(&mut mem[vaddr..vaddr + filesz], phdr.p_offset).unwrap_or_else(|err| { panic!( "fail reading ELF into mem with vaddr={}, filesz={}, offset={}. Error detail: {}", vaddr, filesz, phdr.p_offset, err @@ -133,7 +164,7 @@ impl Simulator { debug!("load_elf: symtab not found"); }; - Ok((mem, fn_sym_tab)) + Ok((elf.ehdr.e_entry, mem, fn_sym_tab)) } fn write_mem(&mut self, addr: u32, alignment_bytes: u32, masks: &[bool], data: &[u8]) { @@ -213,19 +244,56 @@ impl Simulator { } pub(crate) fn watchdog(&mut self) -> u8 { - trace!("watchdog continue"); - WATCHDOG_CONTINUE + let tick = get_t(); + if tick > self.timeout { + error!("[{}] watchdog timeout", get_t()); + WATCHDOG_TIMEOUT + } else { + #[cfg(feature = "trace")] + if self.dump_end != 0 && tick > self.dump_end { + info!( + "[{tick}] run to dump end, exiting (last_commit_cycle={})", + self.last_commit_cycle + ); + return WATCHDOG_TIMEOUT; + } + + #[cfg(feature = "trace")] + if !self.dump_started && tick >= self.dump_start { + self.start_dump_wave(); + self.dump_started = true; + } + + trace!("[{}] watchdog continue", get_t()); + WATCHDOG_CONTINUE + } } } #[cfg(test)] mod test { use super::*; + use std::process::Command; #[test] fn test_load_elf() { - let _ = Simulator::load_elf(Path::new("./result/bin/codegen.vsseg4e8_v.elf")).unwrap(); - // TODO: verify address and bit + let output = Command::new("nix") + .args([ + "build", + "--no-warn-dirty", + "--print-out-paths", + "--no-link", + ".#riscv-tests", + ]) + .output() + .expect("fail to get riscv-test path"); + if !output.status.success() { + panic!("fail to build riscv-test"); + } + + let test_path = String::from_utf8_lossy(&output.stdout).to_string(); + + Simulator::load_elf(Path::new(&test_path)).unwrap(); } #[test] diff --git a/rocketemu/src/AXI4SlaveAgent.scala b/rocketemu/src/AXI4SlaveAgent.scala index d14fdeb91..8c5937476 100644 --- a/rocketemu/src/AXI4SlaveAgent.scala +++ b/rocketemu/src/AXI4SlaveAgent.scala @@ -6,7 +6,7 @@ package org.chipsalliance.t1.rocketv.dpi // TODO: upstream to AMBA as VIP import chisel3._ import chisel3.util.circt.dpi.{RawClockedVoidFunctionCall, RawUnclockedNonVoidFunctionCall} -import chisel3.util.{OHToUInt, scanLeftOr} +import chisel3.util.{isPow2, log2Ceil} import org.chipsalliance.amba.axi4.bundle.{ARChannel, ARFlowControl, AWChannel, AWFlowControl, AXI4BundleParameter, AXI4ROIrrevocableVerilog, AXI4RWIrrevocableVerilog, AXI4WOIrrevocableVerilog, BChannel, BFlowControl, RChannel, RFlowControl, WChannel, WFlowControl} case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int, readPayloadSize: Int, writePayloadSize: Int) @@ -26,7 +26,8 @@ class AXI4SlaveAgentInterface(parameter: AXI4SlaveAgentParameter) extends Bundle class WritePayload(length: Int, dataWidth: Int) extends Bundle { val data = Vec(length, UInt(dataWidth.W)) - val strb = Vec(length, UInt((dataWidth / 8).W)) + // For dataWidth <= 8, align strb to u8 for a simple C-API + val strb = Vec(length, UInt(math.max(8, dataWidth / 8).W)) } class ReadPayload(length: Int,dataWidth: Int) extends Bundle { @@ -50,12 +51,10 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) private class WriteManager( channel: AWChannel with AWFlowControl with WChannel with WFlowControl with BChannel with BFlowControl) { withClockAndReset(io.clock, io.reset) { - /** indicate AW is issued. */ - val awIssued = RegInit(0.U.asTypeOf(Bool())) - /** indicate W is finished, used to wake up B channel. */ - val last = RegInit(0.U.asTypeOf(Bool())) - /** indicate there is an ongoing write transaction. */ - val busy = RegInit(0.U.asTypeOf(Bool())) + /** There is an aw in the register. */ + val awIssued = RegInit(false.B) + /** There is a w in the register. */ + val last = RegInit(false.B) /** memory to store the write payload * @todo limit the payload size based on the RTL configuration. @@ -72,15 +71,19 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) val awprot = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWPROT))) val awqos = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWQOS))) val awregion = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWREGION))) + val awuser = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWUSER))) /** index the payload, used to write [[writePayload]] */ val writeIdx = RegInit(0.U.asTypeOf(UInt(8.W))) + val bFire = channel.BREADY && channel.BVALID + val awFire = channel.AWREADY && channel.AWVALID + val wLastFire = channel.WVALID && channel.WREADY && channel.WLAST + val awExist = channel.AWVALID || awIssued + val wExist = channel.WVALID && channel.WLAST || last // AW - channel.AWREADY := !busy || (busy && !awIssued) + channel.AWREADY := !awIssued || (wExist && channel.BREADY) when(channel.AWREADY && channel.AWVALID) { - awIssued := true.B - busy := true.B awid := channel.AWID awaddr := channel.AWADDR awlen := channel.AWLEN @@ -91,46 +94,52 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) awprot := channel.AWPROT awqos := channel.AWQOS awregion := channel.AWREGION + awuser := channel.AWUSER + } + when(awFire ^ bFire) { + awIssued := awFire } // W - channel.WREADY := !busy || (busy && !last) + val writePayloadUpdate = WireDefault(writePayload) + channel.WREADY := !last || (awExist && channel.BREADY) when(channel.WVALID && channel.WREADY) { - busy := true.B writePayload.data(writeIdx) := channel.WDATA - writePayload.strb(writeIdx) := channel.WSTRB + writePayloadUpdate.data(writeIdx) := channel.WDATA + writePayload.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writePayloadUpdate.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) writeIdx := writeIdx + 1.U when(channel.WLAST) { - last := true.B + writeIdx := 0.U } } + when(wLastFire ^ bFire) { + last := wLastFire + } // B - channel.BVALID := last && awIssued - channel.BID := awid + channel.BVALID := awExist && wExist + channel.BID := Mux(awIssued, awid, channel.AWID) channel.BRESP := 0.U(2.W) // OK - channel.BUSER := DontCare + channel.BUSER := Mux(awIssued, awuser, channel.AWUSER) when(channel.BVALID && channel.BREADY) { RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( io.clock, when.cond && !io.gateWrite, io.channelId, // handle AW and W at same beat. - Mux(channel.AWREADY && channel.AWVALID, channel.AWID, awid.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWADDR, awaddr.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWLEN, awlen.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWSIZE, awsize.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWBURST, awburst.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWLOCK, awlock.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWCACHE, awcache.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWPROT, awprot.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWQOS, awqos.asTypeOf(UInt(64.W))), - Mux(channel.AWREADY && channel.AWVALID, channel.AWREGION, awregion.asTypeOf(UInt(64.W))), - WireDefault(writePayload) + Mux(awIssued, awid.asTypeOf(UInt(64.W)), channel.AWID), + Mux(awIssued, awaddr.asTypeOf(UInt(64.W)), channel.AWADDR), + Mux(awIssued, awlen.asTypeOf(UInt(64.W)), channel.AWLEN), + Mux(awIssued, awsize.asTypeOf(UInt(64.W)), channel.AWSIZE), + Mux(awIssued, awburst.asTypeOf(UInt(64.W)), channel.AWBURST), + Mux(awIssued, awlock.asTypeOf(UInt(64.W)), channel.AWLOCK), + Mux(awIssued, awcache.asTypeOf(UInt(64.W)), channel.AWCACHE), + Mux(awIssued, awprot.asTypeOf(UInt(64.W)), channel.AWPROT), + Mux(awIssued, awqos.asTypeOf(UInt(64.W)), channel.AWQOS), + Mux(awIssued, awregion.asTypeOf(UInt(64.W)), channel.AWREGION), + writePayloadUpdate ) - awIssued := false.B - last := false.B - writeIdx := 0.U } } } @@ -146,21 +155,16 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) } /** CAM to maintain order of read requests. This is maintained as FIFO. */ val cam: Vec[CAMValue] = RegInit(0.U.asTypeOf(Vec(parameter.outstanding, new CAMValue))) - /** find first one circuit. */ - def ffo(input: UInt): UInt = ((~(scanLeftOr(input) << 1)).asUInt & input)(input.getWidth - 1, 0) - /** find first non-valid slot in [[cam]] */ - val firstEmpty: UInt = OHToUInt(ffo(VecInit(cam.map(!_.valid)).asUInt)) - /** there are no outstanding read requests. */ - val camIsEmpty = VecInit(cam.map(content => !content.valid)).asUInt.andR - /** find oldest to index which cam to use. */ - val oldest = OHToUInt(ffo(VecInit(cam.map(content => content.valid)).asUInt)) + require(isPow2(parameter.outstanding), "Need to handle pointers") + val arPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + val rPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) // AR - channel.ARREADY := VecInit(cam.map(!_.valid)).asUInt.orR + channel.ARREADY := !cam(arPtr).valid when(channel.ARREADY && channel.ARVALID) { - cam(firstEmpty).arid := channel.ARID - cam(firstEmpty).arlen := channel.ARLEN - cam(firstEmpty).readPayload := RawUnclockedNonVoidFunctionCall(s"axi_read_${parameter.name}", new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth))( + cam(arPtr).arid := channel.ARID + cam(arPtr).arlen := channel.ARLEN + cam(arPtr).readPayload := RawUnclockedNonVoidFunctionCall(s"axi_read_${parameter.name}", new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth))( when.cond && !io.gateRead, io.channelId, channel.ARID.asTypeOf(UInt(64.W)), @@ -173,23 +177,25 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) channel.ARPROT.asTypeOf(UInt(64.W)), channel.ARQOS.asTypeOf(UInt(64.W)), channel.ARREGION.asTypeOf(UInt(64.W)) - ).asInstanceOf[ReadPayload] - cam(firstEmpty).readPayloadIndex := 0.U - cam(firstEmpty).valid := true.B + ) + cam(arPtr).readPayloadIndex := 0.U + cam(arPtr).valid := true.B + arPtr := arPtr + 1.U } // R - channel.RVALID := VecInit(cam.map(_.valid)).asUInt.orR - channel.RID := cam(oldest).arid - channel.RDATA := cam(oldest).readPayload.data(cam(oldest).readPayloadIndex) + channel.RVALID := cam(rPtr).valid + channel.RID := cam(rPtr).arid + channel.RDATA := cam(rPtr).readPayload.data(cam(rPtr).readPayloadIndex) channel.RRESP := 0.U // OK - channel.RLAST := (cam(oldest).arlen === cam(oldest).readPayloadIndex) && cam(oldest).valid + channel.RLAST := (cam(rPtr).arlen === cam(rPtr).readPayloadIndex) && cam(rPtr).valid channel.RUSER := DontCare when(channel.RREADY && channel.RVALID) { // increase index - cam(oldest).readPayloadIndex := cam(oldest).readPayloadIndex + 1.U + cam(rPtr).readPayloadIndex := cam(rPtr).readPayloadIndex + 1.U when(channel.RLAST) { - cam(oldest).valid := false.B + cam(rPtr).valid := false.B + rPtr := rPtr + 1.U } } } diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index ebd68943d..f76cc2890 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -15,9 +15,6 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar extends RawModule with ImplicitClock with ImplicitReset { - override protected def implicitClock: Clock = clockGen.clock.asClock - override protected def implicitReset: Reset = clockGen.reset - val clockGen = Module(new ExtModule with HasExtModuleInline { override def desiredName = "ClockGen" setInline( @@ -44,8 +41,12 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar val reset = IO(Output(Bool())) }) - val clock: Clock = clockGen.clock.asClock - val reset: Bool = clockGen.reset + val clock: Clock = clockGen.clock.asClock + val reset: Bool = clockGen.reset + + override protected def implicitClock: Clock = clockGen.clock.asClock + override protected def implicitReset: Reset = clockGen.reset + val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) simulationTime := simulationTime + 1.U @@ -59,7 +60,8 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar dut.io.msip := 0.U dut.io.buserror := 0.U - dut.io.resetVector := 10000.U + // FIXME: get resetVector from simulator instead of hard code here + dut.io.resetVector := (BigInt(1) << 31).U // Memory Drivers val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] @@ -81,7 +83,7 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar instFetchAgent.io.reset := reset instFetchAgent.io.channelId := 0.U instFetchAgent.io.gateRead := false.B - instFetchAgent.io.gateWrite := true.B + instFetchAgent.io.gateWrite := false.B val loadStoreAXI = dut.io.loadStoreAXI.viewAs[AXI4RWIrrevocableVerilog] val loadStoreAgent = Module( @@ -102,5 +104,5 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar loadStoreAgent.io.reset := reset loadStoreAgent.io.channelId := 0.U loadStoreAgent.io.gateRead := false.B - loadStoreAgent.io.gateWrite := true.B + loadStoreAgent.io.gateWrite := false.B } From 62f9bda402f0299c79f8ff6b6edfd0d748cbdf3c Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 19 Jul 2024 22:01:23 +0800 Subject: [PATCH 098/140] [rocketemu] implement waveform generate Signed-off-by: Avimitin --- rocketemu/dpi/default.nix | 6 +++ rocketemu/dpi/dpi_pre_link.cc | 1 + rocketemu/driver/src/dpi.rs | 9 ++-- rocketemu/driver/src/sim.rs | 77 ++++++++++++++++++++++++++++++++--- 4 files changed, 84 insertions(+), 9 deletions(-) diff --git a/rocketemu/dpi/default.nix b/rocketemu/dpi/default.nix index d29daf0a7..f8714b4b6 100644 --- a/rocketemu/dpi/default.nix +++ b/rocketemu/dpi/default.nix @@ -2,6 +2,7 @@ , verilator , stdenv , cmake +, ninja , rocketv-verilated-csrc }: stdenv.mkDerivation { @@ -11,9 +12,14 @@ stdenv.mkDerivation { nativeBuildInputs = [ cmake + ninja verilator ]; + cmakeFlags = lib.optionals rocketv-verilated-csrc.enable-trace [ + "-DVM_TRACE=ON" + ]; + env = { VERILATED_INC_DIR = "${rocketv-verilated-csrc}/include"; VERILATED_LIB_DIR = "${rocketv-verilated-csrc}/lib"; diff --git a/rocketemu/dpi/dpi_pre_link.cc b/rocketemu/dpi/dpi_pre_link.cc index 17cb86349..be7293350 100644 --- a/rocketemu/dpi/dpi_pre_link.cc +++ b/rocketemu/dpi/dpi_pre_link.cc @@ -12,6 +12,7 @@ int verilator_main_c(int argc, char **argv) { // Setup context, defaults, and parse command line Verilated::debug(0); contextp = new VerilatedContext(); + contextp->fatalOnError(false); contextp->commandArgs(argc, argv); // Construct the Verilated model, from Vtop.h generated from Verilating diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 1c520da50..1a36c4d18 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -178,8 +178,8 @@ unsafe extern "C" fn cosim_watchdog_rs(target: *mut (), reason: *mut c_char) { extern "C" { fn verilator_main_c(argc: c_int, argv: *mut *mut c_char) -> c_int; - // FIXME: support waveform - //fn dump_wave_c(path: *const c_char); + #[cfg(feature = "trace")] + fn dump_wave_c(path: *const c_char); fn get_t_c() -> u64; } @@ -209,10 +209,11 @@ pub(crate) fn verilator_main() { } } -/* pub(crate) fn dump_wave(path: &str) { +#[cfg(feature = "trace")] +pub(crate) fn dump_wave(path: &str) { let path_cstring = CString::new(path).unwrap(); let path_ptr: *const c_char = path_cstring.as_ptr(); unsafe { dump_wave_c(path_ptr); } -} */ +} diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index b9cfb33dc..72c5293d7 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -1,4 +1,4 @@ -use crate::dpi::get_t; +use crate::dpi::{dump_wave, get_t}; use clap::{arg, Parser}; use std::collections::HashMap; @@ -39,6 +39,50 @@ pub struct SimulationArgs { /// The timeout value #[arg(long, default_value_t = 1_0000)] pub timeout: u64, + + #[cfg(feature = "trace")] + #[arg(long)] + pub wave_path: String, + + #[cfg(feature = "trace")] + #[arg(long, default_value = "")] + pub dump_range: String, +} + +impl SimulationArgs { + #[cfg(feature = "trace")] + fn parse_range(&self) -> (u64, u64) { + let input = &self.dump_range; + + if input.is_empty() { + return (0, 0); + } + + let parts: Vec<&str> = input.split(",").collect(); + + if parts.len() != 1 && parts.len() != 2 { + error!("invalid dump wave range: `{input}` was given"); + return (0, 0); + } + + const INVALID_NUMBER: &'static str = "invalid number"; + + if parts.len() == 1 { + return (parts[0].parse().expect(INVALID_NUMBER), 0); + } + + if parts[0].is_empty() { + return (0, parts[1].parse().expect(INVALID_NUMBER)); + } + + let start = parts[0].parse().expect(INVALID_NUMBER); + let end = parts[1].parse().expect(INVALID_NUMBER); + if start > end { + panic!("dump start is larger than end: `{input}`"); + } + + (start, end) + } } // FIXME: fix FunctionSym @@ -62,6 +106,15 @@ pub struct Simulator { pub(crate) fn_sym_tab: FunctionSymTab, pub(crate) dlen: u32, pub(crate) timeout: u64, + + #[cfg(feature = "trace")] + wave_path: String, + #[cfg(feature = "trace")] + dump_start: u64, + #[cfg(feature = "trace")] + dump_end: u64, + #[cfg(feature = "trace")] + dump_started: bool, } pub static WATCHDOG_CONTINUE: u8 = 0; @@ -85,6 +138,9 @@ impl Simulator { let (_FIXME_e_entry, mem, fn_sym_tab) = Self::load_elf(&args.elf_file).expect("fail creating simulator"); + #[cfg(feature = "trace")] + let (dump_start, dump_end) = args.parse_range(); + Self { mem, fn_sym_tab, @@ -92,6 +148,15 @@ impl Simulator { dlen: option_env!("DESIGN_DLEN") .map(|dlen| dlen.parse().expect("fail to parse dlen into u32 digit")) .unwrap_or(256), + + #[cfg(feature = "trace")] + wave_path: args.wave_path.to_owned(), + #[cfg(feature = "trace")] + dump_start, + #[cfg(feature = "trace")] + dump_end, + #[cfg(feature = "trace")] + dump_started: false, } } @@ -251,10 +316,7 @@ impl Simulator { } else { #[cfg(feature = "trace")] if self.dump_end != 0 && tick > self.dump_end { - info!( - "[{tick}] run to dump end, exiting (last_commit_cycle={})", - self.last_commit_cycle - ); + info!("[{tick}] run to dump end, exiting",); return WATCHDOG_TIMEOUT; } @@ -268,6 +330,11 @@ impl Simulator { WATCHDOG_CONTINUE } } + + #[cfg(feature = "trace")] + fn start_dump_wave(&mut self) { + dump_wave(&self.wave_path); + } } #[cfg(test)] From d66404be86a3b0090383ab3be28286c54d2d82e9 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Mon, 22 Jul 2024 17:05:33 +0800 Subject: [PATCH 099/140] [rocketemu] add missing watchdog call --- rocketemu/src/TestBench.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index f76cc2890..7970f6e34 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -7,6 +7,7 @@ import chisel3._ import chisel3.experimental.{ExtModule, SerializableModuleGenerator} import chisel3.experimental.dataview.DataViewable import chisel3.util.{log2Ceil, HasExtModuleInline, PopCount, UIntToOH, Valid} +import chisel3.util.circt.dpi.RawUnclockedNonVoidFunctionCall import org.chipsalliance.amba.axi4.bundle._ import org.chipsalliance.t1.rocketv.dpi._ import org.chipsalliance.rocketv.{RocketTile, RocketTileParameter} @@ -50,6 +51,13 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) simulationTime := simulationTime + 1.U + withClockAndReset(clock, reset) { + val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + when(watchdog =/= 0.U) { + stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") + } + } + val dut: RocketTile = withClockAndReset(clock, reset)(Module(generator.module())) dut.io.clock := clockGen.clock.asClock dut.io.reset := clockGen.reset From 50b45a8a5c15accaf9990b2898dc1d61abc53a51 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Mon, 22 Jul 2024 18:00:07 +0800 Subject: [PATCH 100/140] [rocketemu] Requires a longer reset. --- rocketemu/src/TestBench.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index 7970f6e34..cdf617f7f 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -33,7 +33,7 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar | clock = 1'b0; | reset = 1'b1; | end - | initial #(11) reset = 1'b0; + | initial #(101) reset = 1'b0; | always #10 clock = ~clock; |endmodule |""".stripMargin From 5ad2963c4720b0a9352afd1eda992934d4b965e8 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Mon, 22 Jul 2024 18:21:48 +0800 Subject: [PATCH 101/140] [rocketemu] fix wrong size --- rocketemu/driver/src/sim.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index 72c5293d7..ed2e0a137 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -286,8 +286,7 @@ impl Simulator { } pub fn axi_read_instruction(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { - let size = 1 << arsize; - assert!(size <= 4); + let size = 1 << arsize; // size in bytes let data = self.read_mem(addr, size, 4); let data_hex = hex::encode(&data); info!( @@ -298,7 +297,7 @@ impl Simulator { } pub(crate) fn axi_read_load_store(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { - let size = 1 << arsize; + let size = 1 << arsize; // size in bytes let data = self.read_mem(addr, size, self.dlen / 8); let data_hex = hex::encode(&data); info!( From ad8ce2e3f34b8a5626208ac8dde349ad8f77fc2a Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Mon, 22 Jul 2024 20:22:12 +0800 Subject: [PATCH 102/140] [rocketemu] refactor AXI read functions and remove the read alignment --- rocketemu/driver/src/dpi.rs | 18 ++++++++-------- rocketemu/driver/src/sim.rs | 42 +++++++------------------------------ 2 files changed, 16 insertions(+), 44 deletions(-) diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 1a36c4d18..8731ee1f2 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -91,7 +91,7 @@ unsafe extern "C" fn axi_write_loadStoreAXI_rs( payload: *const SvBitVecVal, ) { debug!( - "axi_write_loadStore (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + "axi_write_loadStoreAXI (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ awlen={awlen}, awsize=2^{awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ awprot={awprot}, awqos={awqos}, awregion={awregion})" ); @@ -118,12 +118,12 @@ unsafe extern "C" fn axi_read_loadStoreAXI_rs( payload: *mut SvBitVecVal, ) { debug!( - "axi_read_highBandwidth (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + "axi_read_loadStoreAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ arprot={arprot}, arqos={arqos}, arregion={arregion})" ); let sim = &mut *(target as *mut Simulator); - let response = sim.axi_read_load_store(araddr as u32, arsize as u64); + let response = sim.axi_read(araddr as u32, arsize as u64); fill_axi_read_payload(payload, sim.dlen, &response.data); } @@ -144,21 +144,21 @@ unsafe extern "C" fn axi_read_instructionFetchAXI_rs( payload: *mut SvBitVecVal, ) { debug!( - "axi_read_indexed (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + "axi_read_instructionFetchAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ arprot={arprot}, arqos={arqos}, arregion={arregion})" ); - let driver = &mut *(target as *mut Simulator); - let response = driver.axi_read_instruction(araddr as u32, arsize as u64); - fill_axi_read_payload(payload, driver.dlen, &response.data); + let sim = &mut *(target as *mut Simulator); + let response = sim.axi_read(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, sim.dlen, &response.data); } #[no_mangle] unsafe extern "C" fn cosim_init_rs(call_init: *mut SvBit) -> *mut () { let args = SimulationArgs::parse(); *call_init = 1; - let driver = Box::new(Simulator::new(args)); - Box::into_raw(driver) as *mut () + let sim = Box::new(Simulator::new(args)); + Box::into_raw(sim) as *mut () } #[no_mangle] diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index ed2e0a137..92ee6c3d4 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -260,48 +260,20 @@ impl Simulator { self.write_mem(addr, self.dlen / 8, strobe, data); } - fn read_mem(&mut self, addr: u32, size: u32, alignment_bytes: u32) -> Vec { + fn read_mem(&mut self, addr: u32, size: u32) -> Vec { assert!( - addr % size == 0 || addr % alignment_bytes == 0, - "unaligned access addr={addr} size={size}bytes dlen={alignment_bytes}bytes" + addr % size == 0, + "unaligned access addr={addr} size={size}bytes" ); - let residue_addr = addr % alignment_bytes; - let aligned_addr = addr - residue_addr; - if size < alignment_bytes { - // narrow - (0..alignment_bytes) - .map(|i| { - let i_addr = aligned_addr + i; - if addr <= i_addr && i_addr < addr + size { - self.mem[i_addr as usize] - } else { - 0 - } - }) - .collect() - } else { - // normal - (0..size).map(|i| self.mem[(addr + i) as usize]).collect() - } - } - - pub fn axi_read_instruction(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { - let size = 1 << arsize; // size in bytes - let data = self.read_mem(addr, size, 4); - let data_hex = hex::encode(&data); - info!( - "[{}] axi_read_indexed (addr={addr:#x}, size={size}, data={data_hex})", - 0 - ); - AxiReadPayload { data } + (0..size).map(|i| self.mem[(addr + i) as usize]).collect() } - pub(crate) fn axi_read_load_store(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + pub fn axi_read(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { let size = 1 << arsize; // size in bytes - let data = self.read_mem(addr, size, self.dlen / 8); + let data = self.read_mem(addr, size); let data_hex = hex::encode(&data); info!( - "[{}] axi_read_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", + "[{}] axi_read (addr={addr:#x}, size={size}, data={data_hex})", 0 ); AxiReadPayload { data } From 46e00e7a446dbcd7e3498bd243a1c27ecd1f2c43 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Tue, 23 Jul 2024 12:55:40 +0800 Subject: [PATCH 103/140] [rocketemu] add axi read burst in TestBench --- ipemu/src/AXI4SlaveAgent.scala | 4 +++- rocketemu/driver/src/sim.rs | 16 ++++++++++++---- rocketemu/src/TestBench.scala | 4 ++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ipemu/src/AXI4SlaveAgent.scala b/ipemu/src/AXI4SlaveAgent.scala index 23b814cde..963b04ebf 100644 --- a/ipemu/src/AXI4SlaveAgent.scala +++ b/ipemu/src/AXI4SlaveAgent.scala @@ -30,6 +30,7 @@ class WritePayload(length: Int, dataWidth: Int) extends Bundle { val strb = Vec(length, UInt(math.max(8, dataWidth / 8).W)) } +// TODO: consider adding the latency of the read transaction class ReadPayload(length: Int,dataWidth: Int) extends Bundle { val data = Vec(length, UInt(dataWidth.W)) } @@ -121,7 +122,8 @@ class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) channel.BVALID := awExist && wExist channel.BID := Mux(awIssued, awid, channel.AWID) channel.BRESP := 0.U(2.W) // OK - channel.BUSER := Mux(awIssued, awuser, channel.AWUSER) + channel.BUSER := DontCare + // TODO: add latency to the write transaction reply when(channel.BVALID && channel.BREADY) { RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( io.clock, diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index 92ee6c3d4..9076dfa2b 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -238,8 +238,7 @@ impl Simulator { return; } let size = data.len() as u32; - debug!("write mem: size={size}, addr={addr:#x}"); - + // debug!("[{}] write_mem: size={size}, addr={addr:#x}", get_t()); assert!( (addr % size == 0 || addr % alignment_bytes == 0) && size >= alignment_bytes, "unaligned write access addr={addr} size={size}bytes dlen={alignment_bytes}bytes" @@ -255,8 +254,15 @@ impl Simulator { assert_eq!( strobe.len(), data.len(), - "write_mem: strobe size is not equal to data size" + "[{}] axi_write: strobe size is not equal to data size", + get_t() + ); + let data_hex = hex::encode(data); + info!( + "[{}] axi_write (addr={addr:#x}, data={data_hex})", + get_t() ); + self.write_mem(addr, self.dlen / 8, strobe, data); } @@ -265,6 +271,8 @@ impl Simulator { addr % size == 0, "unaligned access addr={addr} size={size}bytes" ); + // debug!("[{}] read_mem: size={size}, addr={addr:#x}", get_t()); + (0..size).map(|i| self.mem[(addr + i) as usize]).collect() } @@ -274,7 +282,7 @@ impl Simulator { let data_hex = hex::encode(&data); info!( "[{}] axi_read (addr={addr:#x}, size={size}, data={data_hex})", - 0 + get_t() ); AxiReadPayload { data } } diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index cdf617f7f..c139bf4d8 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -100,8 +100,8 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar name = "loadStoreAXI", axiParameter = loadStoreAXI.parameter, outstanding = 4, - readPayloadSize = 1, - writePayloadSize = 1 + readPayloadSize = 8, // todo: align with parameter in the future + writePayloadSize = 8 ) ).suggestName("axi4_channel1_loadStoreAXI") ) From f46f033cf38fb5643e76bff22fa47ab46ee3f5a9 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 23 Jul 2024 14:41:46 +0800 Subject: [PATCH 104/140] [nix] use riscv32 embedded toolchain to compile riscv-tests --- nix/overlay.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/overlay.nix b/nix/overlay.nix index 3c95e3f67..655d4904c 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -97,7 +97,7 @@ rec { }; }; - riscv-tests = final.pkgsCross.riscv64-embedded.stdenv.mkDerivation rec { + riscv-tests = final.pkgsCross.riscv32-embedded.stdenv.mkDerivation rec { pname = "riscv-tests"; version = "7878085d2546af0eb7af72a1df00996d5d8c43fb"; src = final.fetchgit { @@ -111,9 +111,9 @@ rec { configureFlags = [ # to match rocket-tools path - "--prefix=${placeholder "out"}/riscv64-unknown-elf" + "--prefix=${placeholder "out"}/riscv32-unknown-elf" ]; - buildPhase = "make RISCV_PREFIX=riscv64-none-elf-"; + buildPhase = "make RISCV_PREFIX=riscv32-none-elf-"; installPhase = '' runHook preInstall make install From 15b10f1cc03228623224773fdcef69fbdd16872f Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 23 Jul 2024 14:42:38 +0800 Subject: [PATCH 105/140] [rocketemu] fix trace build for driver --- rocketemu/driver/src/sim.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index 9076dfa2b..cea85af2c 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -1,4 +1,6 @@ -use crate::dpi::{dump_wave, get_t}; +#[cfg(feature = "trace")] +use crate::dpi::dump_wave; +use crate::dpi::get_t; use clap::{arg, Parser}; use std::collections::HashMap; From a40adf21c516acb9fcb256772b7017cc2c33e07c Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Tue, 23 Jul 2024 13:44:19 +0800 Subject: [PATCH 106/140] [rocketemu] cycle should equals to tick / 20 --- rocketemu/driver/src/dpi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 8731ee1f2..65633f63d 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -187,7 +187,7 @@ extern "C" { // FIXME: currently we are using verilator context_p as simulation time. // But we should implement read cycle at TestBench top pub(crate) fn get_t() -> u64 { - unsafe { get_t_c() / 10 } + unsafe { get_t_c() / 20 } } pub(crate) fn verilator_main() { From 1c9dc3f3468c0973235a9bbdf9b3d2b9f925bdf4 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Tue, 23 Jul 2024 16:12:22 +0800 Subject: [PATCH 107/140] [rocketemu] add riscv test env [rocketemu] fix wrong exit macro [rocketemu] fix wrong sw format [nix] use t1-env for riscv-tests [tests] fix MMIO write when exit [rocketemu] remove ecall --- nix/overlay.nix | 13 +- tests/riscv-test-env/LICENSE | 24 + tests/riscv-test-env/encoding.h | 5013 ++++++++++++++++++++++++++ tests/riscv-test-env/p/link.ld | 17 + tests/riscv-test-env/p/riscv_test.h | 282 ++ tests/riscv-test-env/pm/link.ld | 1 + tests/riscv-test-env/pm/riscv_test.h | 11 + tests/riscv-test-env/pt/link.ld | 1 + tests/riscv-test-env/pt/riscv_test.h | 69 + tests/riscv-test-env/v/entry.S | 164 + tests/riscv-test-env/v/link.ld | 1 + tests/riscv-test-env/v/riscv_test.h | 94 + tests/riscv-test-env/v/string.c | 114 + tests/riscv-test-env/v/vm.c | 315 ++ 14 files changed, 6115 insertions(+), 4 deletions(-) create mode 100644 tests/riscv-test-env/LICENSE create mode 100644 tests/riscv-test-env/encoding.h create mode 100644 tests/riscv-test-env/p/link.ld create mode 100644 tests/riscv-test-env/p/riscv_test.h create mode 120000 tests/riscv-test-env/pm/link.ld create mode 100644 tests/riscv-test-env/pm/riscv_test.h create mode 120000 tests/riscv-test-env/pt/link.ld create mode 100644 tests/riscv-test-env/pt/riscv_test.h create mode 100644 tests/riscv-test-env/v/entry.S create mode 120000 tests/riscv-test-env/v/link.ld create mode 100644 tests/riscv-test-env/v/riscv_test.h create mode 100644 tests/riscv-test-env/v/string.c create mode 100644 tests/riscv-test-env/v/vm.c diff --git a/nix/overlay.nix b/nix/overlay.nix index 655d4904c..bd590d879 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -100,13 +100,18 @@ rec { riscv-tests = final.pkgsCross.riscv32-embedded.stdenv.mkDerivation rec { pname = "riscv-tests"; version = "7878085d2546af0eb7af72a1df00996d5d8c43fb"; - src = final.fetchgit { - url = "https://github.com/riscv-software-src/riscv-tests.git"; + src = final.fetchFromGitHub { + owner = "riscv-software-src"; + repo = "riscv-tests"; rev = "${version}"; - fetchSubmodules = true; - hash = "sha256-3SUfmUHwvEG4Fi6YWLLhzMhASyL07euMmkIoc9leYFE="; + hash = "sha256-CruSrXVO5Qlk63HPBVbwzl/RdxAAl2bknWawDHJwEKY="; }; + postUnpack = '' + rm -rf $sourceRoot/env + cp -r ${../tests/riscv-test-env} $sourceRoot/env + ''; + enableParallelBuilding = true; configureFlags = [ diff --git a/tests/riscv-test-env/LICENSE b/tests/riscv-test-env/LICENSE new file mode 100644 index 000000000..48fe522ac --- /dev/null +++ b/tests/riscv-test-env/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2012-2015, The Regents of the University of California (Regents). +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the Regents nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/tests/riscv-test-env/encoding.h b/tests/riscv-test-env/encoding.h new file mode 100644 index 000000000..01889d1a9 --- /dev/null +++ b/tests/riscv-test-env/encoding.h @@ -0,0 +1,5013 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +/* Copyright (c) 2023 RISC-V International */ + +/* + * This file is auto-generated by running 'make' in + * https://github.com/riscv/riscv-opcodes (02b4866) + */ + +#ifndef RISCV_CSR_ENCODING_H +#define RISCV_CSR_ENCODING_H + +#define MSTATUS_UIE 0x00000001 +#define MSTATUS_SIE 0x00000002 +#define MSTATUS_HIE 0x00000004 +#define MSTATUS_MIE 0x00000008 +#define MSTATUS_UPIE 0x00000010 +#define MSTATUS_SPIE 0x00000020 +#define MSTATUS_UBE 0x00000040 +#define MSTATUS_MPIE 0x00000080 +#define MSTATUS_SPP 0x00000100 +#define MSTATUS_VS 0x00000600 +#define MSTATUS_MPP 0x00001800 +#define MSTATUS_FS 0x00006000 +#define MSTATUS_XS 0x00018000 +#define MSTATUS_MPRV 0x00020000 +#define MSTATUS_SUM 0x00040000 +#define MSTATUS_MXR 0x00080000 +#define MSTATUS_TVM 0x00100000 +#define MSTATUS_TW 0x00200000 +#define MSTATUS_TSR 0x00400000 +#define MSTATUS32_SD 0x80000000 +#define MSTATUS_UXL 0x0000000300000000 +#define MSTATUS_SXL 0x0000000C00000000 +#define MSTATUS_SBE 0x0000001000000000 +#define MSTATUS_MBE 0x0000002000000000 +#define MSTATUS_GVA 0x0000004000000000 +#define MSTATUS_MPV 0x0000008000000000 +#define MSTATUS64_SD 0x8000000000000000 + +#define MSTATUSH_SBE 0x00000010 +#define MSTATUSH_MBE 0x00000020 +#define MSTATUSH_GVA 0x00000040 +#define MSTATUSH_MPV 0x00000080 + +#define SSTATUS_UIE 0x00000001 +#define SSTATUS_SIE 0x00000002 +#define SSTATUS_UPIE 0x00000010 +#define SSTATUS_SPIE 0x00000020 +#define SSTATUS_UBE 0x00000040 +#define SSTATUS_SPP 0x00000100 +#define SSTATUS_VS 0x00000600 +#define SSTATUS_FS 0x00006000 +#define SSTATUS_XS 0x00018000 +#define SSTATUS_SUM 0x00040000 +#define SSTATUS_MXR 0x00080000 +#define SSTATUS32_SD 0x80000000 +#define SSTATUS_UXL 0x0000000300000000 +#define SSTATUS64_SD 0x8000000000000000 + +#define HSTATUS_VSXL 0x300000000 +#define HSTATUS_VTSR 0x00400000 +#define HSTATUS_VTW 0x00200000 +#define HSTATUS_VTVM 0x00100000 +#define HSTATUS_VGEIN 0x0003f000 +#define HSTATUS_HU 0x00000200 +#define HSTATUS_SPVP 0x00000100 +#define HSTATUS_SPV 0x00000080 +#define HSTATUS_GVA 0x00000040 +#define HSTATUS_VSBE 0x00000020 + +#define USTATUS_UIE 0x00000001 +#define USTATUS_UPIE 0x00000010 + +#define MNSTATUS_NMIE 0x00000008 +#define MNSTATUS_MNPP 0x00001800 +#define MNSTATUS_MNPV 0x00000080 + +#define DCSR_XDEBUGVER (3U<<30) +#define DCSR_NDRESET (1<<29) +#define DCSR_FULLRESET (1<<28) +#define DCSR_EBREAKM (1<<15) +#define DCSR_EBREAKH (1<<14) +#define DCSR_EBREAKS (1<<13) +#define DCSR_EBREAKU (1<<12) +#define DCSR_STOPCYCLE (1<<10) +#define DCSR_STOPTIME (1<<9) +#define DCSR_CAUSE (7<<6) +#define DCSR_DEBUGINT (1<<5) +#define DCSR_HALT (1<<3) +#define DCSR_STEP (1<<2) +#define DCSR_PRV (3<<0) + +#define DCSR_CAUSE_NONE 0 +#define DCSR_CAUSE_SWBP 1 +#define DCSR_CAUSE_HWBP 2 +#define DCSR_CAUSE_DEBUGINT 3 +#define DCSR_CAUSE_STEP 4 +#define DCSR_CAUSE_HALT 5 +#define DCSR_CAUSE_GROUP 6 + +#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) +#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) +#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) + +#define MCONTROL_SELECT (1<<19) +#define MCONTROL_TIMING (1<<18) +#define MCONTROL_ACTION (0x3f<<12) +#define MCONTROL_CHAIN (1<<11) +#define MCONTROL_MATCH (0xf<<7) +#define MCONTROL_M (1<<6) +#define MCONTROL_H (1<<5) +#define MCONTROL_S (1<<4) +#define MCONTROL_U (1<<3) +#define MCONTROL_EXECUTE (1<<2) +#define MCONTROL_STORE (1<<1) +#define MCONTROL_LOAD (1<<0) + +#define MCONTROL_TYPE_NONE 0 +#define MCONTROL_TYPE_MATCH 2 + +#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 +#define MCONTROL_ACTION_DEBUG_MODE 1 +#define MCONTROL_ACTION_TRACE_START 2 +#define MCONTROL_ACTION_TRACE_STOP 3 +#define MCONTROL_ACTION_TRACE_EMIT 4 + +#define MCONTROL_MATCH_EQUAL 0 +#define MCONTROL_MATCH_NAPOT 1 +#define MCONTROL_MATCH_GE 2 +#define MCONTROL_MATCH_LT 3 +#define MCONTROL_MATCH_MASK_LOW 4 +#define MCONTROL_MATCH_MASK_HIGH 5 + +#define MIP_USIP (1 << IRQ_U_SOFT) +#define MIP_SSIP (1 << IRQ_S_SOFT) +#define MIP_VSSIP (1 << IRQ_VS_SOFT) +#define MIP_MSIP (1 << IRQ_M_SOFT) +#define MIP_UTIP (1 << IRQ_U_TIMER) +#define MIP_STIP (1 << IRQ_S_TIMER) +#define MIP_VSTIP (1 << IRQ_VS_TIMER) +#define MIP_MTIP (1 << IRQ_M_TIMER) +#define MIP_UEIP (1 << IRQ_U_EXT) +#define MIP_SEIP (1 << IRQ_S_EXT) +#define MIP_VSEIP (1 << IRQ_VS_EXT) +#define MIP_MEIP (1 << IRQ_M_EXT) +#define MIP_SGEIP (1 << IRQ_S_GEXT) +#define MIP_LCOFIP (1 << IRQ_LCOF) + +#define MIP_S_MASK (MIP_SSIP | MIP_STIP | MIP_SEIP) +#define MIP_VS_MASK (MIP_VSSIP | MIP_VSTIP | MIP_VSEIP) +#define MIP_HS_MASK (MIP_VS_MASK | MIP_SGEIP) + +#define MIDELEG_FORCED_MASK MIP_HS_MASK + +#define SIP_SSIP MIP_SSIP +#define SIP_STIP MIP_STIP + +#define MENVCFG_FIOM 0x00000001 +#define MENVCFG_CBIE 0x00000030 +#define MENVCFG_CBCFE 0x00000040 +#define MENVCFG_CBZE 0x00000080 +#define MENVCFG_HADE 0x2000000000000000 +#define MENVCFG_PBMTE 0x4000000000000000 +#define MENVCFG_STCE 0x8000000000000000 + +#define MENVCFGH_HADE 0x20000000 +#define MENVCFGH_PBMTE 0x40000000 +#define MENVCFGH_STCE 0x80000000 + +#define MSTATEEN0_CS 0x00000001 +#define MSTATEEN0_FCSR 0x00000002 +#define MSTATEEN0_JVT 0x00000004 +#define MSTATEEN0_HCONTEXT 0x0200000000000000 +#define MSTATEEN0_HENVCFG 0x4000000000000000 +#define MSTATEEN_HSTATEEN 0x8000000000000000 + +#define MSTATEEN0H_HCONTEXT 0x02000000 +#define MSTATEEN0H_HENVCFG 0x40000000 +#define MSTATEENH_HSTATEEN 0x80000000 + +#define MHPMEVENT_VUINH 0x0400000000000000 +#define MHPMEVENT_VSINH 0x0800000000000000 +#define MHPMEVENT_UINH 0x1000000000000000 +#define MHPMEVENT_SINH 0x2000000000000000 +#define MHPMEVENT_MINH 0x4000000000000000 +#define MHPMEVENT_OF 0x8000000000000000 + +#define MHPMEVENTH_VUINH 0x04000000 +#define MHPMEVENTH_VSINH 0x08000000 +#define MHPMEVENTH_UINH 0x10000000 +#define MHPMEVENTH_SINH 0x20000000 +#define MHPMEVENTH_MINH 0x40000000 +#define MHPMEVENTH_OF 0x80000000 + +#define HENVCFG_FIOM 0x00000001 +#define HENVCFG_CBIE 0x00000030 +#define HENVCFG_CBCFE 0x00000040 +#define HENVCFG_CBZE 0x00000080 +#define HENVCFG_HADE 0x2000000000000000 +#define HENVCFG_PBMTE 0x4000000000000000 +#define HENVCFG_STCE 0x8000000000000000 + +#define HENVCFGH_HADE 0x20000000 +#define HENVCFGH_PBMTE 0x40000000 +#define HENVCFGH_STCE 0x80000000 + +#define HSTATEEN0_CS 0x00000001 +#define HSTATEEN0_FCSR 0x00000002 +#define HSTATEEN0_JVT 0x00000004 +#define HSTATEEN0_SCONTEXT 0x0200000000000000 +#define HSTATEEN0_SENVCFG 0x4000000000000000 +#define HSTATEEN_SSTATEEN 0x8000000000000000 + +#define HSTATEEN0H_SCONTEXT 0x02000000 +#define HSTATEEN0H_SENVCFG 0x40000000 +#define HSTATEENH_SSTATEEN 0x80000000 + +#define SENVCFG_FIOM 0x00000001 +#define SENVCFG_CBIE 0x00000030 +#define SENVCFG_CBCFE 0x00000040 +#define SENVCFG_CBZE 0x00000080 + +#define SSTATEEN0_CS 0x00000001 +#define SSTATEEN0_FCSR 0x00000002 +#define SSTATEEN0_JVT 0x00000004 + +#define MSECCFG_MML 0x00000001 +#define MSECCFG_MMWP 0x00000002 +#define MSECCFG_RLB 0x00000004 +#define MSECCFG_USEED 0x00000100 +#define MSECCFG_SSEED 0x00000200 + +/* jvt fields */ +#define JVT_MODE 0x3F +#define JVT_BASE (~0x3F) + +#define PRV_U 0 +#define PRV_S 1 +#define PRV_M 3 + +#define PRV_HS (PRV_S + 1) + +#define SATP32_MODE 0x80000000 +#define SATP32_ASID 0x7FC00000 +#define SATP32_PPN 0x003FFFFF +#define SATP64_MODE 0xF000000000000000 +#define SATP64_ASID 0x0FFFF00000000000 +#define SATP64_PPN 0x00000FFFFFFFFFFF + +#define SATP_MODE_OFF 0 +#define SATP_MODE_SV32 1 +#define SATP_MODE_SV39 8 +#define SATP_MODE_SV48 9 +#define SATP_MODE_SV57 10 +#define SATP_MODE_SV64 11 + +#define HGATP32_MODE 0x80000000 +#define HGATP32_VMID 0x1FC00000 +#define HGATP32_PPN 0x003FFFFF + +#define HGATP64_MODE 0xF000000000000000 +#define HGATP64_VMID 0x03FFF00000000000 +#define HGATP64_PPN 0x00000FFFFFFFFFFF + +#define HGATP_MODE_OFF 0 +#define HGATP_MODE_SV32X4 1 +#define HGATP_MODE_SV39X4 8 +#define HGATP_MODE_SV48X4 9 +#define HGATP_MODE_SV57X4 10 + +#define PMP_R 0x01 +#define PMP_W 0x02 +#define PMP_X 0x04 +#define PMP_A 0x18 +#define PMP_L 0x80 +#define PMP_SHIFT 2 + +#define PMP_TOR 0x08 +#define PMP_NA4 0x10 +#define PMP_NAPOT 0x18 + +#define IRQ_U_SOFT 0 +#define IRQ_S_SOFT 1 +#define IRQ_VS_SOFT 2 +#define IRQ_M_SOFT 3 +#define IRQ_U_TIMER 4 +#define IRQ_S_TIMER 5 +#define IRQ_VS_TIMER 6 +#define IRQ_M_TIMER 7 +#define IRQ_U_EXT 8 +#define IRQ_S_EXT 9 +#define IRQ_VS_EXT 10 +#define IRQ_M_EXT 11 +#define IRQ_S_GEXT 12 +#define IRQ_COP 12 +#define IRQ_LCOF 13 + +#define DEFAULT_RSTVEC 0x00001000 +#define CLINT_BASE 0x02000000 +#define CLINT_SIZE 0x000c0000 +#define EXT_IO_BASE 0x40000000 +#define DRAM_BASE 0x80000000 + +/* page table entry (PTE) fields */ +#define PTE_V 0x001 /* Valid */ +#define PTE_R 0x002 /* Read */ +#define PTE_W 0x004 /* Write */ +#define PTE_X 0x008 /* Execute */ +#define PTE_U 0x010 /* User */ +#define PTE_G 0x020 /* Global */ +#define PTE_A 0x040 /* Accessed */ +#define PTE_D 0x080 /* Dirty */ +#define PTE_SOFT 0x300 /* Reserved for Software */ +#define PTE_RSVD 0x1FC0000000000000 /* Reserved for future standard use */ +#define PTE_PBMT 0x6000000000000000 /* Svpbmt: Page-based memory types */ +#define PTE_N 0x8000000000000000 /* Svnapot: NAPOT translation contiguity */ +#define PTE_ATTR 0xFFC0000000000000 /* All attributes and reserved bits */ + +#define PTE_PPN_SHIFT 10 + +#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) + +#ifdef __riscv + +#if __riscv_xlen == 64 +# define MSTATUS_SD MSTATUS64_SD +# define SSTATUS_SD SSTATUS64_SD +# define RISCV_PGLEVEL_BITS 9 +# define SATP_MODE SATP64_MODE +#else +# define MSTATUS_SD MSTATUS32_SD +# define SSTATUS_SD SSTATUS32_SD +# define RISCV_PGLEVEL_BITS 10 +# define SATP_MODE SATP32_MODE +#endif +#define RISCV_PGSHIFT 12 +#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) + +#ifndef __ASSEMBLER__ + +#ifdef __GNUC__ + +#define read_csr(reg) ({ unsigned long __tmp; \ + asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ + __tmp; }) + +#define write_csr(reg, val) ({ \ + asm volatile ("csrw " #reg ", %0" :: "rK"(val)); }) + +#define swap_csr(reg, val) ({ unsigned long __tmp; \ + asm volatile ("csrrw %0, " #reg ", %1" : "=r"(__tmp) : "rK"(val)); \ + __tmp; }) + +#define set_csr(reg, bit) ({ unsigned long __tmp; \ + asm volatile ("csrrs %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ + __tmp; }) + +#define clear_csr(reg, bit) ({ unsigned long __tmp; \ + asm volatile ("csrrc %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ + __tmp; }) + +#define rdtime() read_csr(time) +#define rdcycle() read_csr(cycle) +#define rdinstret() read_csr(instret) + +#endif + +#endif + +#endif + +#endif + +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +#define MATCH_ADD 0x33 +#define MASK_ADD 0xfe00707f +#define MATCH_ADD16 0x40000077 +#define MASK_ADD16 0xfe00707f +#define MATCH_ADD32 0x40002077 +#define MASK_ADD32 0xfe00707f +#define MATCH_ADD64 0xc0001077 +#define MASK_ADD64 0xfe00707f +#define MATCH_ADD8 0x48000077 +#define MASK_ADD8 0xfe00707f +#define MATCH_ADD_UW 0x800003b +#define MASK_ADD_UW 0xfe00707f +#define MATCH_ADDI 0x13 +#define MASK_ADDI 0x707f +#define MATCH_ADDIW 0x1b +#define MASK_ADDIW 0x707f +#define MATCH_ADDW 0x3b +#define MASK_ADDW 0xfe00707f +#define MATCH_AES32DSI 0x2a000033 +#define MASK_AES32DSI 0x3e00707f +#define MATCH_AES32DSMI 0x2e000033 +#define MASK_AES32DSMI 0x3e00707f +#define MATCH_AES32ESI 0x22000033 +#define MASK_AES32ESI 0x3e00707f +#define MATCH_AES32ESMI 0x26000033 +#define MASK_AES32ESMI 0x3e00707f +#define MATCH_AES64DS 0x3a000033 +#define MASK_AES64DS 0xfe00707f +#define MATCH_AES64DSM 0x3e000033 +#define MASK_AES64DSM 0xfe00707f +#define MATCH_AES64ES 0x32000033 +#define MASK_AES64ES 0xfe00707f +#define MATCH_AES64ESM 0x36000033 +#define MASK_AES64ESM 0xfe00707f +#define MATCH_AES64IM 0x30001013 +#define MASK_AES64IM 0xfff0707f +#define MATCH_AES64KS1I 0x31001013 +#define MASK_AES64KS1I 0xff00707f +#define MATCH_AES64KS2 0x7e000033 +#define MASK_AES64KS2 0xfe00707f +#define MATCH_AMOADD_D 0x302f +#define MASK_AMOADD_D 0xf800707f +#define MATCH_AMOADD_W 0x202f +#define MASK_AMOADD_W 0xf800707f +#define MATCH_AMOAND_D 0x6000302f +#define MASK_AMOAND_D 0xf800707f +#define MATCH_AMOAND_W 0x6000202f +#define MASK_AMOAND_W 0xf800707f +#define MATCH_AMOMAX_D 0xa000302f +#define MASK_AMOMAX_D 0xf800707f +#define MATCH_AMOMAX_W 0xa000202f +#define MASK_AMOMAX_W 0xf800707f +#define MATCH_AMOMAXU_D 0xe000302f +#define MASK_AMOMAXU_D 0xf800707f +#define MATCH_AMOMAXU_W 0xe000202f +#define MASK_AMOMAXU_W 0xf800707f +#define MATCH_AMOMIN_D 0x8000302f +#define MASK_AMOMIN_D 0xf800707f +#define MATCH_AMOMIN_W 0x8000202f +#define MASK_AMOMIN_W 0xf800707f +#define MATCH_AMOMINU_D 0xc000302f +#define MASK_AMOMINU_D 0xf800707f +#define MATCH_AMOMINU_W 0xc000202f +#define MASK_AMOMINU_W 0xf800707f +#define MATCH_AMOOR_D 0x4000302f +#define MASK_AMOOR_D 0xf800707f +#define MATCH_AMOOR_W 0x4000202f +#define MASK_AMOOR_W 0xf800707f +#define MATCH_AMOSWAP_D 0x800302f +#define MASK_AMOSWAP_D 0xf800707f +#define MATCH_AMOSWAP_W 0x800202f +#define MASK_AMOSWAP_W 0xf800707f +#define MATCH_AMOXOR_D 0x2000302f +#define MASK_AMOXOR_D 0xf800707f +#define MATCH_AMOXOR_W 0x2000202f +#define MASK_AMOXOR_W 0xf800707f +#define MATCH_AND 0x7033 +#define MASK_AND 0xfe00707f +#define MATCH_ANDI 0x7013 +#define MASK_ANDI 0x707f +#define MATCH_ANDN 0x40007033 +#define MASK_ANDN 0xfe00707f +#define MATCH_AUIPC 0x17 +#define MASK_AUIPC 0x7f +#define MATCH_AVE 0xe0000077 +#define MASK_AVE 0xfe00707f +#define MATCH_BCLR 0x48001033 +#define MASK_BCLR 0xfe00707f +#define MATCH_BCLRI 0x48001013 +#define MASK_BCLRI 0xfc00707f +#define MATCH_BCOMPRESS 0x8006033 +#define MASK_BCOMPRESS 0xfe00707f +#define MATCH_BCOMPRESSW 0x800603b +#define MASK_BCOMPRESSW 0xfe00707f +#define MATCH_BDECOMPRESS 0x48006033 +#define MASK_BDECOMPRESS 0xfe00707f +#define MATCH_BDECOMPRESSW 0x4800603b +#define MASK_BDECOMPRESSW 0xfe00707f +#define MATCH_BEQ 0x63 +#define MASK_BEQ 0x707f +#define MATCH_BEXT 0x48005033 +#define MASK_BEXT 0xfe00707f +#define MATCH_BEXTI 0x48005013 +#define MASK_BEXTI 0xfc00707f +#define MATCH_BFP 0x48007033 +#define MASK_BFP 0xfe00707f +#define MATCH_BFPW 0x4800703b +#define MASK_BFPW 0xfe00707f +#define MATCH_BGE 0x5063 +#define MASK_BGE 0x707f +#define MATCH_BGEU 0x7063 +#define MASK_BGEU 0x707f +#define MATCH_BINV 0x68001033 +#define MASK_BINV 0xfe00707f +#define MATCH_BINVI 0x68001013 +#define MASK_BINVI 0xfc00707f +#define MATCH_BLT 0x4063 +#define MASK_BLT 0x707f +#define MATCH_BLTU 0x6063 +#define MASK_BLTU 0x707f +#define MATCH_BMATFLIP 0x60301013 +#define MASK_BMATFLIP 0xfff0707f +#define MATCH_BMATOR 0x8003033 +#define MASK_BMATOR 0xfe00707f +#define MATCH_BMATXOR 0x48003033 +#define MASK_BMATXOR 0xfe00707f +#define MATCH_BNE 0x1063 +#define MASK_BNE 0x707f +#define MATCH_BSET 0x28001033 +#define MASK_BSET 0xfe00707f +#define MATCH_BSETI 0x28001013 +#define MASK_BSETI 0xfc00707f +#define MATCH_C_ADD 0x9002 +#define MASK_C_ADD 0xf003 +#define MATCH_C_ADDI 0x1 +#define MASK_C_ADDI 0xe003 +#define MATCH_C_ADDI16SP 0x6101 +#define MASK_C_ADDI16SP 0xef83 +#define MATCH_C_ADDI4SPN 0x0 +#define MASK_C_ADDI4SPN 0xe003 +#define MATCH_C_ADDIW 0x2001 +#define MASK_C_ADDIW 0xe003 +#define MATCH_C_ADDW 0x9c21 +#define MASK_C_ADDW 0xfc63 +#define MATCH_C_AND 0x8c61 +#define MASK_C_AND 0xfc63 +#define MATCH_C_ANDI 0x8801 +#define MASK_C_ANDI 0xec03 +#define MATCH_C_BEQZ 0xc001 +#define MASK_C_BEQZ 0xe003 +#define MATCH_C_BNEZ 0xe001 +#define MASK_C_BNEZ 0xe003 +#define MATCH_C_EBREAK 0x9002 +#define MASK_C_EBREAK 0xffff +#define MATCH_C_FLD 0x2000 +#define MASK_C_FLD 0xe003 +#define MATCH_C_FLDSP 0x2002 +#define MASK_C_FLDSP 0xe003 +#define MATCH_C_FLW 0x6000 +#define MASK_C_FLW 0xe003 +#define MATCH_C_FLWSP 0x6002 +#define MASK_C_FLWSP 0xe003 +#define MATCH_C_FSD 0xa000 +#define MASK_C_FSD 0xe003 +#define MATCH_C_FSDSP 0xa002 +#define MASK_C_FSDSP 0xe003 +#define MATCH_C_FSW 0xe000 +#define MASK_C_FSW 0xe003 +#define MATCH_C_FSWSP 0xe002 +#define MASK_C_FSWSP 0xe003 +#define MATCH_C_J 0xa001 +#define MASK_C_J 0xe003 +#define MATCH_C_JAL 0x2001 +#define MASK_C_JAL 0xe003 +#define MATCH_C_JALR 0x9002 +#define MASK_C_JALR 0xf07f +#define MATCH_C_JR 0x8002 +#define MASK_C_JR 0xf07f +#define MATCH_C_LBU 0x8000 +#define MASK_C_LBU 0xfc03 +#define MATCH_C_LD 0x6000 +#define MASK_C_LD 0xe003 +#define MATCH_C_LDSP 0x6002 +#define MASK_C_LDSP 0xe003 +#define MATCH_C_LH 0x8440 +#define MASK_C_LH 0xfc43 +#define MATCH_C_LHU 0x8400 +#define MASK_C_LHU 0xfc43 +#define MATCH_C_LI 0x4001 +#define MASK_C_LI 0xe003 +#define MATCH_C_LUI 0x6001 +#define MASK_C_LUI 0xe003 +#define MATCH_C_LW 0x4000 +#define MASK_C_LW 0xe003 +#define MATCH_C_LWSP 0x4002 +#define MASK_C_LWSP 0xe003 +#define MATCH_C_MUL 0x9c41 +#define MASK_C_MUL 0xfc63 +#define MATCH_C_MV 0x8002 +#define MASK_C_MV 0xf003 +#define MATCH_C_NOP 0x1 +#define MASK_C_NOP 0xef83 +#define MATCH_C_NOT 0x9c75 +#define MASK_C_NOT 0xfc7f +#define MATCH_C_OR 0x8c41 +#define MASK_C_OR 0xfc63 +#define MATCH_C_SB 0x8800 +#define MASK_C_SB 0xfc03 +#define MATCH_C_SD 0xe000 +#define MASK_C_SD 0xe003 +#define MATCH_C_SDSP 0xe002 +#define MASK_C_SDSP 0xe003 +#define MATCH_C_SEXT_B 0x9c65 +#define MASK_C_SEXT_B 0xfc7f +#define MATCH_C_SEXT_H 0x9c6d +#define MASK_C_SEXT_H 0xfc7f +#define MATCH_C_SH 0x8c00 +#define MASK_C_SH 0xfc43 +#define MATCH_C_SLLI 0x2 +#define MASK_C_SLLI 0xe003 +#define MATCH_C_SRAI 0x8401 +#define MASK_C_SRAI 0xec03 +#define MATCH_C_SRLI 0x8001 +#define MASK_C_SRLI 0xec03 +#define MATCH_C_SUB 0x8c01 +#define MASK_C_SUB 0xfc63 +#define MATCH_C_SUBW 0x9c01 +#define MASK_C_SUBW 0xfc63 +#define MATCH_C_SW 0xc000 +#define MASK_C_SW 0xe003 +#define MATCH_C_SWSP 0xc002 +#define MASK_C_SWSP 0xe003 +#define MATCH_C_XOR 0x8c21 +#define MASK_C_XOR 0xfc63 +#define MATCH_C_ZEXT_B 0x9c61 +#define MASK_C_ZEXT_B 0xfc7f +#define MATCH_C_ZEXT_H 0x9c69 +#define MASK_C_ZEXT_H 0xfc7f +#define MATCH_C_ZEXT_W 0x9c71 +#define MASK_C_ZEXT_W 0xfc7f +#define MATCH_CBO_CLEAN 0x10200f +#define MASK_CBO_CLEAN 0xfff07fff +#define MATCH_CBO_FLUSH 0x20200f +#define MASK_CBO_FLUSH 0xfff07fff +#define MATCH_CBO_INVAL 0x200f +#define MASK_CBO_INVAL 0xfff07fff +#define MATCH_CBO_ZERO 0x40200f +#define MASK_CBO_ZERO 0xfff07fff +#define MATCH_CLMUL 0xa001033 +#define MASK_CLMUL 0xfe00707f +#define MATCH_CLMULH 0xa003033 +#define MASK_CLMULH 0xfe00707f +#define MATCH_CLMULR 0xa002033 +#define MASK_CLMULR 0xfe00707f +#define MATCH_CLRS16 0xae800077 +#define MASK_CLRS16 0xfff0707f +#define MATCH_CLRS32 0xaf800077 +#define MASK_CLRS32 0xfff0707f +#define MATCH_CLRS8 0xae000077 +#define MASK_CLRS8 0xfff0707f +#define MATCH_CLZ 0x60001013 +#define MASK_CLZ 0xfff0707f +#define MATCH_CLZ16 0xae900077 +#define MASK_CLZ16 0xfff0707f +#define MATCH_CLZ32 0xaf900077 +#define MASK_CLZ32 0xfff0707f +#define MATCH_CLZ8 0xae100077 +#define MASK_CLZ8 0xfff0707f +#define MATCH_CLZW 0x6000101b +#define MASK_CLZW 0xfff0707f +#define MATCH_CM_JALT 0xa002 +#define MASK_CM_JALT 0xfc03 +#define MATCH_CM_MVA01S 0xac62 +#define MASK_CM_MVA01S 0xfc63 +#define MATCH_CM_MVSA01 0xac22 +#define MASK_CM_MVSA01 0xfc63 +#define MATCH_CM_POP 0xba02 +#define MASK_CM_POP 0xff03 +#define MATCH_CM_POPRET 0xbe02 +#define MASK_CM_POPRET 0xff03 +#define MATCH_CM_POPRETZ 0xbc02 +#define MASK_CM_POPRETZ 0xff03 +#define MATCH_CM_PUSH 0xb802 +#define MASK_CM_PUSH 0xff03 +#define MATCH_CMIX 0x6001033 +#define MASK_CMIX 0x600707f +#define MATCH_CMOV 0x6005033 +#define MASK_CMOV 0x600707f +#define MATCH_CMPEQ16 0x4c000077 +#define MASK_CMPEQ16 0xfe00707f +#define MATCH_CMPEQ8 0x4e000077 +#define MASK_CMPEQ8 0xfe00707f +#define MATCH_CPOP 0x60201013 +#define MASK_CPOP 0xfff0707f +#define MATCH_CPOPW 0x6020101b +#define MASK_CPOPW 0xfff0707f +#define MATCH_CRAS16 0x44000077 +#define MASK_CRAS16 0xfe00707f +#define MATCH_CRAS32 0x44002077 +#define MASK_CRAS32 0xfe00707f +#define MATCH_CRC32_B 0x61001013 +#define MASK_CRC32_B 0xfff0707f +#define MATCH_CRC32_D 0x61301013 +#define MASK_CRC32_D 0xfff0707f +#define MATCH_CRC32_H 0x61101013 +#define MASK_CRC32_H 0xfff0707f +#define MATCH_CRC32_W 0x61201013 +#define MASK_CRC32_W 0xfff0707f +#define MATCH_CRC32C_B 0x61801013 +#define MASK_CRC32C_B 0xfff0707f +#define MATCH_CRC32C_D 0x61b01013 +#define MASK_CRC32C_D 0xfff0707f +#define MATCH_CRC32C_H 0x61901013 +#define MASK_CRC32C_H 0xfff0707f +#define MATCH_CRC32C_W 0x61a01013 +#define MASK_CRC32C_W 0xfff0707f +#define MATCH_CRSA16 0x46000077 +#define MASK_CRSA16 0xfe00707f +#define MATCH_CRSA32 0x46002077 +#define MASK_CRSA32 0xfe00707f +#define MATCH_CSRRC 0x3073 +#define MASK_CSRRC 0x707f +#define MATCH_CSRRCI 0x7073 +#define MASK_CSRRCI 0x707f +#define MATCH_CSRRS 0x2073 +#define MASK_CSRRS 0x707f +#define MATCH_CSRRSI 0x6073 +#define MASK_CSRRSI 0x707f +#define MATCH_CSRRW 0x1073 +#define MASK_CSRRW 0x707f +#define MATCH_CSRRWI 0x5073 +#define MASK_CSRRWI 0x707f +#define MATCH_CTZ 0x60101013 +#define MASK_CTZ 0xfff0707f +#define MATCH_CTZW 0x6010101b +#define MASK_CTZW 0xfff0707f +#define MATCH_CZERO_EQZ 0xe005033 +#define MASK_CZERO_EQZ 0xfe00707f +#define MATCH_CZERO_NEZ 0xe007033 +#define MASK_CZERO_NEZ 0xfe00707f +#define MATCH_DIV 0x2004033 +#define MASK_DIV 0xfe00707f +#define MATCH_DIVU 0x2005033 +#define MASK_DIVU 0xfe00707f +#define MATCH_DIVUW 0x200503b +#define MASK_DIVUW 0xfe00707f +#define MATCH_DIVW 0x200403b +#define MASK_DIVW 0xfe00707f +#define MATCH_DRET 0x7b200073 +#define MASK_DRET 0xffffffff +#define MATCH_EBREAK 0x100073 +#define MASK_EBREAK 0xffffffff +#define MATCH_ECALL 0x73 +#define MASK_ECALL 0xffffffff +#define MATCH_FADD_D 0x2000053 +#define MASK_FADD_D 0xfe00007f +#define MATCH_FADD_H 0x4000053 +#define MASK_FADD_H 0xfe00007f +#define MATCH_FADD_Q 0x6000053 +#define MASK_FADD_Q 0xfe00007f +#define MATCH_FADD_S 0x53 +#define MASK_FADD_S 0xfe00007f +#define MATCH_FCLASS_D 0xe2001053 +#define MASK_FCLASS_D 0xfff0707f +#define MATCH_FCLASS_H 0xe4001053 +#define MASK_FCLASS_H 0xfff0707f +#define MATCH_FCLASS_Q 0xe6001053 +#define MASK_FCLASS_Q 0xfff0707f +#define MATCH_FCLASS_S 0xe0001053 +#define MASK_FCLASS_S 0xfff0707f +#define MATCH_FCVT_D_H 0x42200053 +#define MASK_FCVT_D_H 0xfff0007f +#define MATCH_FCVT_D_L 0xd2200053 +#define MASK_FCVT_D_L 0xfff0007f +#define MATCH_FCVT_D_LU 0xd2300053 +#define MASK_FCVT_D_LU 0xfff0007f +#define MATCH_FCVT_D_Q 0x42300053 +#define MASK_FCVT_D_Q 0xfff0007f +#define MATCH_FCVT_D_S 0x42000053 +#define MASK_FCVT_D_S 0xfff0007f +#define MATCH_FCVT_D_W 0xd2000053 +#define MASK_FCVT_D_W 0xfff0007f +#define MATCH_FCVT_D_WU 0xd2100053 +#define MASK_FCVT_D_WU 0xfff0007f +#define MATCH_FCVT_H_D 0x44100053 +#define MASK_FCVT_H_D 0xfff0007f +#define MATCH_FCVT_H_L 0xd4200053 +#define MASK_FCVT_H_L 0xfff0007f +#define MATCH_FCVT_H_LU 0xd4300053 +#define MASK_FCVT_H_LU 0xfff0007f +#define MATCH_FCVT_H_Q 0x44300053 +#define MASK_FCVT_H_Q 0xfff0007f +#define MATCH_FCVT_H_S 0x44000053 +#define MASK_FCVT_H_S 0xfff0007f +#define MATCH_FCVT_H_W 0xd4000053 +#define MASK_FCVT_H_W 0xfff0007f +#define MATCH_FCVT_H_WU 0xd4100053 +#define MASK_FCVT_H_WU 0xfff0007f +#define MATCH_FCVT_L_D 0xc2200053 +#define MASK_FCVT_L_D 0xfff0007f +#define MATCH_FCVT_L_H 0xc4200053 +#define MASK_FCVT_L_H 0xfff0007f +#define MATCH_FCVT_L_Q 0xc6200053 +#define MASK_FCVT_L_Q 0xfff0007f +#define MATCH_FCVT_L_S 0xc0200053 +#define MASK_FCVT_L_S 0xfff0007f +#define MATCH_FCVT_LU_D 0xc2300053 +#define MASK_FCVT_LU_D 0xfff0007f +#define MATCH_FCVT_LU_H 0xc4300053 +#define MASK_FCVT_LU_H 0xfff0007f +#define MATCH_FCVT_LU_Q 0xc6300053 +#define MASK_FCVT_LU_Q 0xfff0007f +#define MATCH_FCVT_LU_S 0xc0300053 +#define MASK_FCVT_LU_S 0xfff0007f +#define MATCH_FCVT_Q_D 0x46100053 +#define MASK_FCVT_Q_D 0xfff0007f +#define MATCH_FCVT_Q_H 0x46200053 +#define MASK_FCVT_Q_H 0xfff0007f +#define MATCH_FCVT_Q_L 0xd6200053 +#define MASK_FCVT_Q_L 0xfff0007f +#define MATCH_FCVT_Q_LU 0xd6300053 +#define MASK_FCVT_Q_LU 0xfff0007f +#define MATCH_FCVT_Q_S 0x46000053 +#define MASK_FCVT_Q_S 0xfff0007f +#define MATCH_FCVT_Q_W 0xd6000053 +#define MASK_FCVT_Q_W 0xfff0007f +#define MATCH_FCVT_Q_WU 0xd6100053 +#define MASK_FCVT_Q_WU 0xfff0007f +#define MATCH_FCVT_S_D 0x40100053 +#define MASK_FCVT_S_D 0xfff0007f +#define MATCH_FCVT_S_H 0x40200053 +#define MASK_FCVT_S_H 0xfff0007f +#define MATCH_FCVT_S_L 0xd0200053 +#define MASK_FCVT_S_L 0xfff0007f +#define MATCH_FCVT_S_LU 0xd0300053 +#define MASK_FCVT_S_LU 0xfff0007f +#define MATCH_FCVT_S_Q 0x40300053 +#define MASK_FCVT_S_Q 0xfff0007f +#define MATCH_FCVT_S_W 0xd0000053 +#define MASK_FCVT_S_W 0xfff0007f +#define MATCH_FCVT_S_WU 0xd0100053 +#define MASK_FCVT_S_WU 0xfff0007f +#define MATCH_FCVT_W_D 0xc2000053 +#define MASK_FCVT_W_D 0xfff0007f +#define MATCH_FCVT_W_H 0xc4000053 +#define MASK_FCVT_W_H 0xfff0007f +#define MATCH_FCVT_W_Q 0xc6000053 +#define MASK_FCVT_W_Q 0xfff0007f +#define MATCH_FCVT_W_S 0xc0000053 +#define MASK_FCVT_W_S 0xfff0007f +#define MATCH_FCVT_WU_D 0xc2100053 +#define MASK_FCVT_WU_D 0xfff0007f +#define MATCH_FCVT_WU_H 0xc4100053 +#define MASK_FCVT_WU_H 0xfff0007f +#define MATCH_FCVT_WU_Q 0xc6100053 +#define MASK_FCVT_WU_Q 0xfff0007f +#define MATCH_FCVT_WU_S 0xc0100053 +#define MASK_FCVT_WU_S 0xfff0007f +#define MATCH_FDIV_D 0x1a000053 +#define MASK_FDIV_D 0xfe00007f +#define MATCH_FDIV_H 0x1c000053 +#define MASK_FDIV_H 0xfe00007f +#define MATCH_FDIV_Q 0x1e000053 +#define MASK_FDIV_Q 0xfe00007f +#define MATCH_FDIV_S 0x18000053 +#define MASK_FDIV_S 0xfe00007f +#define MATCH_FENCE 0xf +#define MASK_FENCE 0x707f +#define MATCH_FENCE_I 0x100f +#define MASK_FENCE_I 0x707f +#define MATCH_FEQ_D 0xa2002053 +#define MASK_FEQ_D 0xfe00707f +#define MATCH_FEQ_H 0xa4002053 +#define MASK_FEQ_H 0xfe00707f +#define MATCH_FEQ_Q 0xa6002053 +#define MASK_FEQ_Q 0xfe00707f +#define MATCH_FEQ_S 0xa0002053 +#define MASK_FEQ_S 0xfe00707f +#define MATCH_FLD 0x3007 +#define MASK_FLD 0x707f +#define MATCH_FLE_D 0xa2000053 +#define MASK_FLE_D 0xfe00707f +#define MATCH_FLE_H 0xa4000053 +#define MASK_FLE_H 0xfe00707f +#define MATCH_FLE_Q 0xa6000053 +#define MASK_FLE_Q 0xfe00707f +#define MATCH_FLE_S 0xa0000053 +#define MASK_FLE_S 0xfe00707f +#define MATCH_FLH 0x1007 +#define MASK_FLH 0x707f +#define MATCH_FLQ 0x4007 +#define MASK_FLQ 0x707f +#define MATCH_FLT_D 0xa2001053 +#define MASK_FLT_D 0xfe00707f +#define MATCH_FLT_H 0xa4001053 +#define MASK_FLT_H 0xfe00707f +#define MATCH_FLT_Q 0xa6001053 +#define MASK_FLT_Q 0xfe00707f +#define MATCH_FLT_S 0xa0001053 +#define MASK_FLT_S 0xfe00707f +#define MATCH_FLW 0x2007 +#define MASK_FLW 0x707f +#define MATCH_FMADD_D 0x2000043 +#define MASK_FMADD_D 0x600007f +#define MATCH_FMADD_H 0x4000043 +#define MASK_FMADD_H 0x600007f +#define MATCH_FMADD_Q 0x6000043 +#define MASK_FMADD_Q 0x600007f +#define MATCH_FMADD_S 0x43 +#define MASK_FMADD_S 0x600007f +#define MATCH_FMAX_D 0x2a001053 +#define MASK_FMAX_D 0xfe00707f +#define MATCH_FMAX_H 0x2c001053 +#define MASK_FMAX_H 0xfe00707f +#define MATCH_FMAX_Q 0x2e001053 +#define MASK_FMAX_Q 0xfe00707f +#define MATCH_FMAX_S 0x28001053 +#define MASK_FMAX_S 0xfe00707f +#define MATCH_FMIN_D 0x2a000053 +#define MASK_FMIN_D 0xfe00707f +#define MATCH_FMIN_H 0x2c000053 +#define MASK_FMIN_H 0xfe00707f +#define MATCH_FMIN_Q 0x2e000053 +#define MASK_FMIN_Q 0xfe00707f +#define MATCH_FMIN_S 0x28000053 +#define MASK_FMIN_S 0xfe00707f +#define MATCH_FMSUB_D 0x2000047 +#define MASK_FMSUB_D 0x600007f +#define MATCH_FMSUB_H 0x4000047 +#define MASK_FMSUB_H 0x600007f +#define MATCH_FMSUB_Q 0x6000047 +#define MASK_FMSUB_Q 0x600007f +#define MATCH_FMSUB_S 0x47 +#define MASK_FMSUB_S 0x600007f +#define MATCH_FMUL_D 0x12000053 +#define MASK_FMUL_D 0xfe00007f +#define MATCH_FMUL_H 0x14000053 +#define MASK_FMUL_H 0xfe00007f +#define MATCH_FMUL_Q 0x16000053 +#define MASK_FMUL_Q 0xfe00007f +#define MATCH_FMUL_S 0x10000053 +#define MASK_FMUL_S 0xfe00007f +#define MATCH_FMV_D_X 0xf2000053 +#define MASK_FMV_D_X 0xfff0707f +#define MATCH_FMV_H_X 0xf4000053 +#define MASK_FMV_H_X 0xfff0707f +#define MATCH_FMV_W_X 0xf0000053 +#define MASK_FMV_W_X 0xfff0707f +#define MATCH_FMV_X_D 0xe2000053 +#define MASK_FMV_X_D 0xfff0707f +#define MATCH_FMV_X_H 0xe4000053 +#define MASK_FMV_X_H 0xfff0707f +#define MATCH_FMV_X_W 0xe0000053 +#define MASK_FMV_X_W 0xfff0707f +#define MATCH_FNMADD_D 0x200004f +#define MASK_FNMADD_D 0x600007f +#define MATCH_FNMADD_H 0x400004f +#define MASK_FNMADD_H 0x600007f +#define MATCH_FNMADD_Q 0x600004f +#define MASK_FNMADD_Q 0x600007f +#define MATCH_FNMADD_S 0x4f +#define MASK_FNMADD_S 0x600007f +#define MATCH_FNMSUB_D 0x200004b +#define MASK_FNMSUB_D 0x600007f +#define MATCH_FNMSUB_H 0x400004b +#define MASK_FNMSUB_H 0x600007f +#define MATCH_FNMSUB_Q 0x600004b +#define MASK_FNMSUB_Q 0x600007f +#define MATCH_FNMSUB_S 0x4b +#define MASK_FNMSUB_S 0x600007f +#define MATCH_FSD 0x3027 +#define MASK_FSD 0x707f +#define MATCH_FSGNJ_D 0x22000053 +#define MASK_FSGNJ_D 0xfe00707f +#define MATCH_FSGNJ_H 0x24000053 +#define MASK_FSGNJ_H 0xfe00707f +#define MATCH_FSGNJ_Q 0x26000053 +#define MASK_FSGNJ_Q 0xfe00707f +#define MATCH_FSGNJ_S 0x20000053 +#define MASK_FSGNJ_S 0xfe00707f +#define MATCH_FSGNJN_D 0x22001053 +#define MASK_FSGNJN_D 0xfe00707f +#define MATCH_FSGNJN_H 0x24001053 +#define MASK_FSGNJN_H 0xfe00707f +#define MATCH_FSGNJN_Q 0x26001053 +#define MASK_FSGNJN_Q 0xfe00707f +#define MATCH_FSGNJN_S 0x20001053 +#define MASK_FSGNJN_S 0xfe00707f +#define MATCH_FSGNJX_D 0x22002053 +#define MASK_FSGNJX_D 0xfe00707f +#define MATCH_FSGNJX_H 0x24002053 +#define MASK_FSGNJX_H 0xfe00707f +#define MATCH_FSGNJX_Q 0x26002053 +#define MASK_FSGNJX_Q 0xfe00707f +#define MATCH_FSGNJX_S 0x20002053 +#define MASK_FSGNJX_S 0xfe00707f +#define MATCH_FSH 0x1027 +#define MASK_FSH 0x707f +#define MATCH_FSL 0x4001033 +#define MASK_FSL 0x600707f +#define MATCH_FSLW 0x400103b +#define MASK_FSLW 0x600707f +#define MATCH_FSQ 0x4027 +#define MASK_FSQ 0x707f +#define MATCH_FSQRT_D 0x5a000053 +#define MASK_FSQRT_D 0xfff0007f +#define MATCH_FSQRT_H 0x5c000053 +#define MASK_FSQRT_H 0xfff0007f +#define MATCH_FSQRT_Q 0x5e000053 +#define MASK_FSQRT_Q 0xfff0007f +#define MATCH_FSQRT_S 0x58000053 +#define MASK_FSQRT_S 0xfff0007f +#define MATCH_FSR 0x4005033 +#define MASK_FSR 0x600707f +#define MATCH_FSRI 0x4005013 +#define MASK_FSRI 0x400707f +#define MATCH_FSRIW 0x400501b +#define MASK_FSRIW 0x600707f +#define MATCH_FSRW 0x400503b +#define MASK_FSRW 0x600707f +#define MATCH_FSUB_D 0xa000053 +#define MASK_FSUB_D 0xfe00007f +#define MATCH_FSUB_H 0xc000053 +#define MASK_FSUB_H 0xfe00007f +#define MATCH_FSUB_Q 0xe000053 +#define MASK_FSUB_Q 0xfe00007f +#define MATCH_FSUB_S 0x8000053 +#define MASK_FSUB_S 0xfe00007f +#define MATCH_FSW 0x2027 +#define MASK_FSW 0x707f +#define MATCH_GORC 0x28005033 +#define MASK_GORC 0xfe00707f +#define MATCH_GORCI 0x28005013 +#define MASK_GORCI 0xfc00707f +#define MATCH_GORCIW 0x2800501b +#define MASK_GORCIW 0xfe00707f +#define MATCH_GORCW 0x2800503b +#define MASK_GORCW 0xfe00707f +#define MATCH_GREV 0x68005033 +#define MASK_GREV 0xfe00707f +#define MATCH_GREVI 0x68005013 +#define MASK_GREVI 0xfc00707f +#define MATCH_GREVIW 0x6800501b +#define MASK_GREVIW 0xfe00707f +#define MATCH_GREVW 0x6800503b +#define MASK_GREVW 0xfe00707f +#define MATCH_HFENCE_GVMA 0x62000073 +#define MASK_HFENCE_GVMA 0xfe007fff +#define MATCH_HFENCE_VVMA 0x22000073 +#define MASK_HFENCE_VVMA 0xfe007fff +#define MATCH_HINVAL_GVMA 0x66000073 +#define MASK_HINVAL_GVMA 0xfe007fff +#define MATCH_HINVAL_VVMA 0x26000073 +#define MASK_HINVAL_VVMA 0xfe007fff +#define MATCH_HLV_B 0x60004073 +#define MASK_HLV_B 0xfff0707f +#define MATCH_HLV_BU 0x60104073 +#define MASK_HLV_BU 0xfff0707f +#define MATCH_HLV_D 0x6c004073 +#define MASK_HLV_D 0xfff0707f +#define MATCH_HLV_H 0x64004073 +#define MASK_HLV_H 0xfff0707f +#define MATCH_HLV_HU 0x64104073 +#define MASK_HLV_HU 0xfff0707f +#define MATCH_HLV_W 0x68004073 +#define MASK_HLV_W 0xfff0707f +#define MATCH_HLV_WU 0x68104073 +#define MASK_HLV_WU 0xfff0707f +#define MATCH_HLVX_HU 0x64304073 +#define MASK_HLVX_HU 0xfff0707f +#define MATCH_HLVX_WU 0x68304073 +#define MASK_HLVX_WU 0xfff0707f +#define MATCH_HSV_B 0x62004073 +#define MASK_HSV_B 0xfe007fff +#define MATCH_HSV_D 0x6e004073 +#define MASK_HSV_D 0xfe007fff +#define MATCH_HSV_H 0x66004073 +#define MASK_HSV_H 0xfe007fff +#define MATCH_HSV_W 0x6a004073 +#define MASK_HSV_W 0xfe007fff +#define MATCH_INSB 0xac000077 +#define MASK_INSB 0xff80707f +#define MATCH_JAL 0x6f +#define MASK_JAL 0x7f +#define MATCH_JALR 0x67 +#define MASK_JALR 0x707f +#define MATCH_KABS16 0xad100077 +#define MASK_KABS16 0xfff0707f +#define MATCH_KABS32 0xad200077 +#define MASK_KABS32 0xfff0707f +#define MATCH_KABS8 0xad000077 +#define MASK_KABS8 0xfff0707f +#define MATCH_KABSW 0xad400077 +#define MASK_KABSW 0xfff0707f +#define MATCH_KADD16 0x10000077 +#define MASK_KADD16 0xfe00707f +#define MATCH_KADD32 0x10002077 +#define MASK_KADD32 0xfe00707f +#define MATCH_KADD64 0x90001077 +#define MASK_KADD64 0xfe00707f +#define MATCH_KADD8 0x18000077 +#define MASK_KADD8 0xfe00707f +#define MATCH_KADDH 0x4001077 +#define MASK_KADDH 0xfe00707f +#define MATCH_KADDW 0x1077 +#define MASK_KADDW 0xfe00707f +#define MATCH_KCRAS16 0x14000077 +#define MASK_KCRAS16 0xfe00707f +#define MATCH_KCRAS32 0x14002077 +#define MASK_KCRAS32 0xfe00707f +#define MATCH_KCRSA16 0x16000077 +#define MASK_KCRSA16 0xfe00707f +#define MATCH_KCRSA32 0x16002077 +#define MASK_KCRSA32 0xfe00707f +#define MATCH_KDMABB 0xd2001077 +#define MASK_KDMABB 0xfe00707f +#define MATCH_KDMABB16 0xd8001077 +#define MASK_KDMABB16 0xfe00707f +#define MATCH_KDMABT 0xe2001077 +#define MASK_KDMABT 0xfe00707f +#define MATCH_KDMABT16 0xe8001077 +#define MASK_KDMABT16 0xfe00707f +#define MATCH_KDMATT 0xf2001077 +#define MASK_KDMATT 0xfe00707f +#define MATCH_KDMATT16 0xf8001077 +#define MASK_KDMATT16 0xfe00707f +#define MATCH_KDMBB 0xa001077 +#define MASK_KDMBB 0xfe00707f +#define MATCH_KDMBB16 0xda001077 +#define MASK_KDMBB16 0xfe00707f +#define MATCH_KDMBT 0x1a001077 +#define MASK_KDMBT 0xfe00707f +#define MATCH_KDMBT16 0xea001077 +#define MASK_KDMBT16 0xfe00707f +#define MATCH_KDMTT 0x2a001077 +#define MASK_KDMTT 0xfe00707f +#define MATCH_KDMTT16 0xfa001077 +#define MASK_KDMTT16 0xfe00707f +#define MATCH_KHM16 0x86000077 +#define MASK_KHM16 0xfe00707f +#define MATCH_KHM8 0x8e000077 +#define MASK_KHM8 0xfe00707f +#define MATCH_KHMBB 0xc001077 +#define MASK_KHMBB 0xfe00707f +#define MATCH_KHMBB16 0xdc001077 +#define MASK_KHMBB16 0xfe00707f +#define MATCH_KHMBT 0x1c001077 +#define MASK_KHMBT 0xfe00707f +#define MATCH_KHMBT16 0xec001077 +#define MASK_KHMBT16 0xfe00707f +#define MATCH_KHMTT 0x2c001077 +#define MASK_KHMTT 0xfe00707f +#define MATCH_KHMTT16 0xfc001077 +#define MASK_KHMTT16 0xfe00707f +#define MATCH_KHMX16 0x96000077 +#define MASK_KHMX16 0xfe00707f +#define MATCH_KHMX8 0x9e000077 +#define MASK_KHMX8 0xfe00707f +#define MATCH_KMABB 0x5a001077 +#define MASK_KMABB 0xfe00707f +#define MATCH_KMABB32 0x5a002077 +#define MASK_KMABB32 0xfe00707f +#define MATCH_KMABT 0x6a001077 +#define MASK_KMABT 0xfe00707f +#define MATCH_KMABT32 0x6a002077 +#define MASK_KMABT32 0xfe00707f +#define MATCH_KMADA 0x48001077 +#define MASK_KMADA 0xfe00707f +#define MATCH_KMADRS 0x6c001077 +#define MASK_KMADRS 0xfe00707f +#define MATCH_KMADRS32 0x6c002077 +#define MASK_KMADRS32 0xfe00707f +#define MATCH_KMADS 0x5c001077 +#define MASK_KMADS 0xfe00707f +#define MATCH_KMADS32 0x5c002077 +#define MASK_KMADS32 0xfe00707f +#define MATCH_KMAR64 0x94001077 +#define MASK_KMAR64 0xfe00707f +#define MATCH_KMATT 0x7a001077 +#define MASK_KMATT 0xfe00707f +#define MATCH_KMATT32 0x7a002077 +#define MASK_KMATT32 0xfe00707f +#define MATCH_KMAXDA 0x4a001077 +#define MASK_KMAXDA 0xfe00707f +#define MATCH_KMAXDA32 0x4a002077 +#define MASK_KMAXDA32 0xfe00707f +#define MATCH_KMAXDS 0x7c001077 +#define MASK_KMAXDS 0xfe00707f +#define MATCH_KMAXDS32 0x7c002077 +#define MASK_KMAXDS32 0xfe00707f +#define MATCH_KMDA 0x38001077 +#define MASK_KMDA 0xfe00707f +#define MATCH_KMDA32 0x38002077 +#define MASK_KMDA32 0xfe00707f +#define MATCH_KMMAC 0x60001077 +#define MASK_KMMAC 0xfe00707f +#define MATCH_KMMAC_U 0x70001077 +#define MASK_KMMAC_U 0xfe00707f +#define MATCH_KMMAWB 0x46001077 +#define MASK_KMMAWB 0xfe00707f +#define MATCH_KMMAWB2 0xce001077 +#define MASK_KMMAWB2 0xfe00707f +#define MATCH_KMMAWB2_U 0xde001077 +#define MASK_KMMAWB2_U 0xfe00707f +#define MATCH_KMMAWB_U 0x56001077 +#define MASK_KMMAWB_U 0xfe00707f +#define MATCH_KMMAWT 0x66001077 +#define MASK_KMMAWT 0xfe00707f +#define MATCH_KMMAWT2 0xee001077 +#define MASK_KMMAWT2 0xfe00707f +#define MATCH_KMMAWT2_U 0xfe001077 +#define MASK_KMMAWT2_U 0xfe00707f +#define MATCH_KMMAWT_U 0x76001077 +#define MASK_KMMAWT_U 0xfe00707f +#define MATCH_KMMSB 0x42001077 +#define MASK_KMMSB 0xfe00707f +#define MATCH_KMMSB_U 0x52001077 +#define MASK_KMMSB_U 0xfe00707f +#define MATCH_KMMWB2 0x8e001077 +#define MASK_KMMWB2 0xfe00707f +#define MATCH_KMMWB2_U 0x9e001077 +#define MASK_KMMWB2_U 0xfe00707f +#define MATCH_KMMWT2 0xae001077 +#define MASK_KMMWT2 0xfe00707f +#define MATCH_KMMWT2_U 0xbe001077 +#define MASK_KMMWT2_U 0xfe00707f +#define MATCH_KMSDA 0x4c001077 +#define MASK_KMSDA 0xfe00707f +#define MATCH_KMSDA32 0x4c002077 +#define MASK_KMSDA32 0xfe00707f +#define MATCH_KMSR64 0x96001077 +#define MASK_KMSR64 0xfe00707f +#define MATCH_KMSXDA 0x4e001077 +#define MASK_KMSXDA 0xfe00707f +#define MATCH_KMSXDA32 0x4e002077 +#define MASK_KMSXDA32 0xfe00707f +#define MATCH_KMXDA 0x3a001077 +#define MASK_KMXDA 0xfe00707f +#define MATCH_KMXDA32 0x3a002077 +#define MASK_KMXDA32 0xfe00707f +#define MATCH_KSLL16 0x64000077 +#define MASK_KSLL16 0xfe00707f +#define MATCH_KSLL32 0x64002077 +#define MASK_KSLL32 0xfe00707f +#define MATCH_KSLL8 0x6c000077 +#define MASK_KSLL8 0xfe00707f +#define MATCH_KSLLI16 0x75000077 +#define MASK_KSLLI16 0xff00707f +#define MATCH_KSLLI32 0x84002077 +#define MASK_KSLLI32 0xfe00707f +#define MATCH_KSLLI8 0x7c800077 +#define MASK_KSLLI8 0xff80707f +#define MATCH_KSLLIW 0x36001077 +#define MASK_KSLLIW 0xfe00707f +#define MATCH_KSLLW 0x26001077 +#define MASK_KSLLW 0xfe00707f +#define MATCH_KSLRA16 0x56000077 +#define MASK_KSLRA16 0xfe00707f +#define MATCH_KSLRA16_U 0x66000077 +#define MASK_KSLRA16_U 0xfe00707f +#define MATCH_KSLRA32 0x56002077 +#define MASK_KSLRA32 0xfe00707f +#define MATCH_KSLRA32_U 0x66002077 +#define MASK_KSLRA32_U 0xfe00707f +#define MATCH_KSLRA8 0x5e000077 +#define MASK_KSLRA8 0xfe00707f +#define MATCH_KSLRA8_U 0x6e000077 +#define MASK_KSLRA8_U 0xfe00707f +#define MATCH_KSLRAW 0x6e001077 +#define MASK_KSLRAW 0xfe00707f +#define MATCH_KSLRAW_U 0x7e001077 +#define MASK_KSLRAW_U 0xfe00707f +#define MATCH_KSTAS16 0xc4002077 +#define MASK_KSTAS16 0xfe00707f +#define MATCH_KSTAS32 0xc0002077 +#define MASK_KSTAS32 0xfe00707f +#define MATCH_KSTSA16 0xc6002077 +#define MASK_KSTSA16 0xfe00707f +#define MATCH_KSTSA32 0xc2002077 +#define MASK_KSTSA32 0xfe00707f +#define MATCH_KSUB16 0x12000077 +#define MASK_KSUB16 0xfe00707f +#define MATCH_KSUB32 0x12002077 +#define MASK_KSUB32 0xfe00707f +#define MATCH_KSUB64 0x92001077 +#define MASK_KSUB64 0xfe00707f +#define MATCH_KSUB8 0x1a000077 +#define MASK_KSUB8 0xfe00707f +#define MATCH_KSUBH 0x6001077 +#define MASK_KSUBH 0xfe00707f +#define MATCH_KSUBW 0x2001077 +#define MASK_KSUBW 0xfe00707f +#define MATCH_KWMMUL 0x62001077 +#define MASK_KWMMUL 0xfe00707f +#define MATCH_KWMMUL_U 0x72001077 +#define MASK_KWMMUL_U 0xfe00707f +#define MATCH_LB 0x3 +#define MASK_LB 0x707f +#define MATCH_LBU 0x4003 +#define MASK_LBU 0x707f +#define MATCH_LD 0x3003 +#define MASK_LD 0x707f +#define MATCH_LH 0x1003 +#define MASK_LH 0x707f +#define MATCH_LHU 0x5003 +#define MASK_LHU 0x707f +#define MATCH_LR_D 0x1000302f +#define MASK_LR_D 0xf9f0707f +#define MATCH_LR_W 0x1000202f +#define MASK_LR_W 0xf9f0707f +#define MATCH_LUI 0x37 +#define MASK_LUI 0x7f +#define MATCH_LW 0x2003 +#define MASK_LW 0x707f +#define MATCH_LWU 0x6003 +#define MASK_LWU 0x707f +#define MATCH_MADDR32 0xc4001077 +#define MASK_MADDR32 0xfe00707f +#define MATCH_MAX 0xa006033 +#define MASK_MAX 0xfe00707f +#define MATCH_MAXU 0xa007033 +#define MASK_MAXU 0xfe00707f +#define MATCH_MIN 0xa004033 +#define MASK_MIN 0xfe00707f +#define MATCH_MINU 0xa005033 +#define MASK_MINU 0xfe00707f +#define MATCH_MNRET 0x70200073 +#define MASK_MNRET 0xffffffff +#define MATCH_MRET 0x30200073 +#define MASK_MRET 0xffffffff +#define MATCH_MSUBR32 0xc6001077 +#define MASK_MSUBR32 0xfe00707f +#define MATCH_MUL 0x2000033 +#define MASK_MUL 0xfe00707f +#define MATCH_MULH 0x2001033 +#define MASK_MULH 0xfe00707f +#define MATCH_MULHSU 0x2002033 +#define MASK_MULHSU 0xfe00707f +#define MATCH_MULHU 0x2003033 +#define MASK_MULHU 0xfe00707f +#define MATCH_MULR64 0xf0001077 +#define MASK_MULR64 0xfe00707f +#define MATCH_MULSR64 0xe0001077 +#define MASK_MULSR64 0xfe00707f +#define MATCH_MULW 0x200003b +#define MASK_MULW 0xfe00707f +#define MATCH_OR 0x6033 +#define MASK_OR 0xfe00707f +#define MATCH_ORI 0x6013 +#define MASK_ORI 0x707f +#define MATCH_ORN 0x40006033 +#define MASK_ORN 0xfe00707f +#define MATCH_PACK 0x8004033 +#define MASK_PACK 0xfe00707f +#define MATCH_PACKH 0x8007033 +#define MASK_PACKH 0xfe00707f +#define MATCH_PACKU 0x48004033 +#define MASK_PACKU 0xfe00707f +#define MATCH_PACKUW 0x4800403b +#define MASK_PACKUW 0xfe00707f +#define MATCH_PACKW 0x800403b +#define MASK_PACKW 0xfe00707f +#define MATCH_PAUSE 0x100000f +#define MASK_PAUSE 0xffffffff +#define MATCH_PBSAD 0xfc000077 +#define MASK_PBSAD 0xfe00707f +#define MATCH_PBSADA 0xfe000077 +#define MASK_PBSADA 0xfe00707f +#define MATCH_PKBB16 0xe001077 +#define MASK_PKBB16 0xfe00707f +#define MATCH_PKBT16 0x1e001077 +#define MASK_PKBT16 0xfe00707f +#define MATCH_PKBT32 0x1e002077 +#define MASK_PKBT32 0xfe00707f +#define MATCH_PKTB16 0x3e001077 +#define MASK_PKTB16 0xfe00707f +#define MATCH_PKTB32 0x3e002077 +#define MASK_PKTB32 0xfe00707f +#define MATCH_PKTT16 0x2e001077 +#define MASK_PKTT16 0xfe00707f +#define MATCH_PREFETCH_I 0x6013 +#define MASK_PREFETCH_I 0x1f07fff +#define MATCH_PREFETCH_R 0x106013 +#define MASK_PREFETCH_R 0x1f07fff +#define MATCH_PREFETCH_W 0x306013 +#define MASK_PREFETCH_W 0x1f07fff +#define MATCH_RADD16 0x77 +#define MASK_RADD16 0xfe00707f +#define MATCH_RADD32 0x2077 +#define MASK_RADD32 0xfe00707f +#define MATCH_RADD64 0x80001077 +#define MASK_RADD64 0xfe00707f +#define MATCH_RADD8 0x8000077 +#define MASK_RADD8 0xfe00707f +#define MATCH_RADDW 0x20001077 +#define MASK_RADDW 0xfe00707f +#define MATCH_RCRAS16 0x4000077 +#define MASK_RCRAS16 0xfe00707f +#define MATCH_RCRAS32 0x4002077 +#define MASK_RCRAS32 0xfe00707f +#define MATCH_RCRSA16 0x6000077 +#define MASK_RCRSA16 0xfe00707f +#define MATCH_RCRSA32 0x6002077 +#define MASK_RCRSA32 0xfe00707f +#define MATCH_REM 0x2006033 +#define MASK_REM 0xfe00707f +#define MATCH_REMU 0x2007033 +#define MASK_REMU 0xfe00707f +#define MATCH_REMUW 0x200703b +#define MASK_REMUW 0xfe00707f +#define MATCH_REMW 0x200603b +#define MASK_REMW 0xfe00707f +#define MATCH_ROL 0x60001033 +#define MASK_ROL 0xfe00707f +#define MATCH_ROLW 0x6000103b +#define MASK_ROLW 0xfe00707f +#define MATCH_ROR 0x60005033 +#define MASK_ROR 0xfe00707f +#define MATCH_RORI 0x60005013 +#define MASK_RORI 0xfc00707f +#define MATCH_RORIW 0x6000501b +#define MASK_RORIW 0xfe00707f +#define MATCH_RORW 0x6000503b +#define MASK_RORW 0xfe00707f +#define MATCH_RSTAS16 0xb4002077 +#define MASK_RSTAS16 0xfe00707f +#define MATCH_RSTAS32 0xb0002077 +#define MASK_RSTAS32 0xfe00707f +#define MATCH_RSTSA16 0xb6002077 +#define MASK_RSTSA16 0xfe00707f +#define MATCH_RSTSA32 0xb2002077 +#define MASK_RSTSA32 0xfe00707f +#define MATCH_RSUB16 0x2000077 +#define MASK_RSUB16 0xfe00707f +#define MATCH_RSUB32 0x2002077 +#define MASK_RSUB32 0xfe00707f +#define MATCH_RSUB64 0x82001077 +#define MASK_RSUB64 0xfe00707f +#define MATCH_RSUB8 0xa000077 +#define MASK_RSUB8 0xfe00707f +#define MATCH_RSUBW 0x22001077 +#define MASK_RSUBW 0xfe00707f +#define MATCH_SB 0x23 +#define MASK_SB 0x707f +#define MATCH_SC_D 0x1800302f +#define MASK_SC_D 0xf800707f +#define MATCH_SC_W 0x1800202f +#define MASK_SC_W 0xf800707f +#define MATCH_SCLIP16 0x84000077 +#define MASK_SCLIP16 0xff00707f +#define MATCH_SCLIP32 0xe4000077 +#define MASK_SCLIP32 0xfe00707f +#define MATCH_SCLIP8 0x8c000077 +#define MASK_SCLIP8 0xff80707f +#define MATCH_SCMPLE16 0x1c000077 +#define MASK_SCMPLE16 0xfe00707f +#define MATCH_SCMPLE8 0x1e000077 +#define MASK_SCMPLE8 0xfe00707f +#define MATCH_SCMPLT16 0xc000077 +#define MASK_SCMPLT16 0xfe00707f +#define MATCH_SCMPLT8 0xe000077 +#define MASK_SCMPLT8 0xfe00707f +#define MATCH_SD 0x3023 +#define MASK_SD 0x707f +#define MATCH_SEXT_B 0x60401013 +#define MASK_SEXT_B 0xfff0707f +#define MATCH_SEXT_H 0x60501013 +#define MASK_SEXT_H 0xfff0707f +#define MATCH_SFENCE_INVAL_IR 0x18100073 +#define MASK_SFENCE_INVAL_IR 0xffffffff +#define MATCH_SFENCE_VMA 0x12000073 +#define MASK_SFENCE_VMA 0xfe007fff +#define MATCH_SFENCE_W_INVAL 0x18000073 +#define MASK_SFENCE_W_INVAL 0xffffffff +#define MATCH_SH 0x1023 +#define MASK_SH 0x707f +#define MATCH_SH1ADD 0x20002033 +#define MASK_SH1ADD 0xfe00707f +#define MATCH_SH1ADD_UW 0x2000203b +#define MASK_SH1ADD_UW 0xfe00707f +#define MATCH_SH2ADD 0x20004033 +#define MASK_SH2ADD 0xfe00707f +#define MATCH_SH2ADD_UW 0x2000403b +#define MASK_SH2ADD_UW 0xfe00707f +#define MATCH_SH3ADD 0x20006033 +#define MASK_SH3ADD 0xfe00707f +#define MATCH_SH3ADD_UW 0x2000603b +#define MASK_SH3ADD_UW 0xfe00707f +#define MATCH_SHA256SIG0 0x10201013 +#define MASK_SHA256SIG0 0xfff0707f +#define MATCH_SHA256SIG1 0x10301013 +#define MASK_SHA256SIG1 0xfff0707f +#define MATCH_SHA256SUM0 0x10001013 +#define MASK_SHA256SUM0 0xfff0707f +#define MATCH_SHA256SUM1 0x10101013 +#define MASK_SHA256SUM1 0xfff0707f +#define MATCH_SHA512SIG0 0x10601013 +#define MASK_SHA512SIG0 0xfff0707f +#define MATCH_SHA512SIG0H 0x5c000033 +#define MASK_SHA512SIG0H 0xfe00707f +#define MATCH_SHA512SIG0L 0x54000033 +#define MASK_SHA512SIG0L 0xfe00707f +#define MATCH_SHA512SIG1 0x10701013 +#define MASK_SHA512SIG1 0xfff0707f +#define MATCH_SHA512SIG1H 0x5e000033 +#define MASK_SHA512SIG1H 0xfe00707f +#define MATCH_SHA512SIG1L 0x56000033 +#define MASK_SHA512SIG1L 0xfe00707f +#define MATCH_SHA512SUM0 0x10401013 +#define MASK_SHA512SUM0 0xfff0707f +#define MATCH_SHA512SUM0R 0x50000033 +#define MASK_SHA512SUM0R 0xfe00707f +#define MATCH_SHA512SUM1 0x10501013 +#define MASK_SHA512SUM1 0xfff0707f +#define MATCH_SHA512SUM1R 0x52000033 +#define MASK_SHA512SUM1R 0xfe00707f +#define MATCH_SHFL 0x8001033 +#define MASK_SHFL 0xfe00707f +#define MATCH_SHFLI 0x8001013 +#define MASK_SHFLI 0xfe00707f +#define MATCH_SHFLW 0x800103b +#define MASK_SHFLW 0xfe00707f +#define MATCH_SINVAL_VMA 0x16000073 +#define MASK_SINVAL_VMA 0xfe007fff +#define MATCH_SLL 0x1033 +#define MASK_SLL 0xfe00707f +#define MATCH_SLL16 0x54000077 +#define MASK_SLL16 0xfe00707f +#define MATCH_SLL32 0x54002077 +#define MASK_SLL32 0xfe00707f +#define MATCH_SLL8 0x5c000077 +#define MASK_SLL8 0xfe00707f +#define MATCH_SLLI 0x1013 +#define MASK_SLLI 0xfc00707f +#define MATCH_SLLI16 0x74000077 +#define MASK_SLLI16 0xff00707f +#define MATCH_SLLI32 0x74002077 +#define MASK_SLLI32 0xfe00707f +#define MATCH_SLLI8 0x7c000077 +#define MASK_SLLI8 0xff80707f +#define MATCH_SLLI_RV32 0x1013 +#define MASK_SLLI_RV32 0xfe00707f +#define MATCH_SLLI_UW 0x800101b +#define MASK_SLLI_UW 0xfc00707f +#define MATCH_SLLIW 0x101b +#define MASK_SLLIW 0xfe00707f +#define MATCH_SLLW 0x103b +#define MASK_SLLW 0xfe00707f +#define MATCH_SLO 0x20001033 +#define MASK_SLO 0xfe00707f +#define MATCH_SLOI 0x20001013 +#define MASK_SLOI 0xfc00707f +#define MATCH_SLOIW 0x2000101b +#define MASK_SLOIW 0xfe00707f +#define MATCH_SLOW 0x2000103b +#define MASK_SLOW 0xfe00707f +#define MATCH_SLT 0x2033 +#define MASK_SLT 0xfe00707f +#define MATCH_SLTI 0x2013 +#define MASK_SLTI 0x707f +#define MATCH_SLTIU 0x3013 +#define MASK_SLTIU 0x707f +#define MATCH_SLTU 0x3033 +#define MASK_SLTU 0xfe00707f +#define MATCH_SM3P0 0x10801013 +#define MASK_SM3P0 0xfff0707f +#define MATCH_SM3P1 0x10901013 +#define MASK_SM3P1 0xfff0707f +#define MATCH_SM4ED 0x30000033 +#define MASK_SM4ED 0x3e00707f +#define MATCH_SM4KS 0x34000033 +#define MASK_SM4KS 0x3e00707f +#define MATCH_SMAL 0x5e001077 +#define MASK_SMAL 0xfe00707f +#define MATCH_SMALBB 0x88001077 +#define MASK_SMALBB 0xfe00707f +#define MATCH_SMALBT 0x98001077 +#define MASK_SMALBT 0xfe00707f +#define MATCH_SMALDA 0x8c001077 +#define MASK_SMALDA 0xfe00707f +#define MATCH_SMALDRS 0x9a001077 +#define MASK_SMALDRS 0xfe00707f +#define MATCH_SMALDS 0x8a001077 +#define MASK_SMALDS 0xfe00707f +#define MATCH_SMALTT 0xa8001077 +#define MASK_SMALTT 0xfe00707f +#define MATCH_SMALXDA 0x9c001077 +#define MASK_SMALXDA 0xfe00707f +#define MATCH_SMALXDS 0xaa001077 +#define MASK_SMALXDS 0xfe00707f +#define MATCH_SMAQA 0xc8000077 +#define MASK_SMAQA 0xfe00707f +#define MATCH_SMAQA_SU 0xca000077 +#define MASK_SMAQA_SU 0xfe00707f +#define MATCH_SMAR64 0x84001077 +#define MASK_SMAR64 0xfe00707f +#define MATCH_SMAX16 0x82000077 +#define MASK_SMAX16 0xfe00707f +#define MATCH_SMAX32 0x92002077 +#define MASK_SMAX32 0xfe00707f +#define MATCH_SMAX8 0x8a000077 +#define MASK_SMAX8 0xfe00707f +#define MATCH_SMBB16 0x8001077 +#define MASK_SMBB16 0xfe00707f +#define MATCH_SMBT16 0x18001077 +#define MASK_SMBT16 0xfe00707f +#define MATCH_SMBT32 0x18002077 +#define MASK_SMBT32 0xfe00707f +#define MATCH_SMDRS 0x68001077 +#define MASK_SMDRS 0xfe00707f +#define MATCH_SMDRS32 0x68002077 +#define MASK_SMDRS32 0xfe00707f +#define MATCH_SMDS 0x58001077 +#define MASK_SMDS 0xfe00707f +#define MATCH_SMDS32 0x58002077 +#define MASK_SMDS32 0xfe00707f +#define MATCH_SMIN16 0x80000077 +#define MASK_SMIN16 0xfe00707f +#define MATCH_SMIN32 0x90002077 +#define MASK_SMIN32 0xfe00707f +#define MATCH_SMIN8 0x88000077 +#define MASK_SMIN8 0xfe00707f +#define MATCH_SMMUL 0x40001077 +#define MASK_SMMUL 0xfe00707f +#define MATCH_SMMUL_U 0x50001077 +#define MASK_SMMUL_U 0xfe00707f +#define MATCH_SMMWB 0x44001077 +#define MASK_SMMWB 0xfe00707f +#define MATCH_SMMWB_U 0x54001077 +#define MASK_SMMWB_U 0xfe00707f +#define MATCH_SMMWT 0x64001077 +#define MASK_SMMWT 0xfe00707f +#define MATCH_SMMWT_U 0x74001077 +#define MASK_SMMWT_U 0xfe00707f +#define MATCH_SMSLDA 0xac001077 +#define MASK_SMSLDA 0xfe00707f +#define MATCH_SMSLXDA 0xbc001077 +#define MASK_SMSLXDA 0xfe00707f +#define MATCH_SMSR64 0x86001077 +#define MASK_SMSR64 0xfe00707f +#define MATCH_SMTT16 0x28001077 +#define MASK_SMTT16 0xfe00707f +#define MATCH_SMTT32 0x28002077 +#define MASK_SMTT32 0xfe00707f +#define MATCH_SMUL16 0xa0000077 +#define MASK_SMUL16 0xfe00707f +#define MATCH_SMUL8 0xa8000077 +#define MASK_SMUL8 0xfe00707f +#define MATCH_SMULX16 0xa2000077 +#define MASK_SMULX16 0xfe00707f +#define MATCH_SMULX8 0xaa000077 +#define MASK_SMULX8 0xfe00707f +#define MATCH_SMXDS 0x78001077 +#define MASK_SMXDS 0xfe00707f +#define MATCH_SMXDS32 0x78002077 +#define MASK_SMXDS32 0xfe00707f +#define MATCH_SRA 0x40005033 +#define MASK_SRA 0xfe00707f +#define MATCH_SRA16 0x50000077 +#define MASK_SRA16 0xfe00707f +#define MATCH_SRA16_U 0x60000077 +#define MASK_SRA16_U 0xfe00707f +#define MATCH_SRA32 0x50002077 +#define MASK_SRA32 0xfe00707f +#define MATCH_SRA32_U 0x60002077 +#define MASK_SRA32_U 0xfe00707f +#define MATCH_SRA8 0x58000077 +#define MASK_SRA8 0xfe00707f +#define MATCH_SRA8_U 0x68000077 +#define MASK_SRA8_U 0xfe00707f +#define MATCH_SRA_U 0x24001077 +#define MASK_SRA_U 0xfe00707f +#define MATCH_SRAI 0x40005013 +#define MASK_SRAI 0xfc00707f +#define MATCH_SRAI16 0x70000077 +#define MASK_SRAI16 0xff00707f +#define MATCH_SRAI16_U 0x71000077 +#define MASK_SRAI16_U 0xff00707f +#define MATCH_SRAI32 0x70002077 +#define MASK_SRAI32 0xfe00707f +#define MATCH_SRAI32_U 0x80002077 +#define MASK_SRAI32_U 0xfe00707f +#define MATCH_SRAI8 0x78000077 +#define MASK_SRAI8 0xff80707f +#define MATCH_SRAI8_U 0x78800077 +#define MASK_SRAI8_U 0xff80707f +#define MATCH_SRAI_RV32 0x40005013 +#define MASK_SRAI_RV32 0xfe00707f +#define MATCH_SRAI_U 0xd4001077 +#define MASK_SRAI_U 0xfc00707f +#define MATCH_SRAIW 0x4000501b +#define MASK_SRAIW 0xfe00707f +#define MATCH_SRAIW_U 0x34001077 +#define MASK_SRAIW_U 0xfe00707f +#define MATCH_SRAW 0x4000503b +#define MASK_SRAW 0xfe00707f +#define MATCH_SRET 0x10200073 +#define MASK_SRET 0xffffffff +#define MATCH_SRL 0x5033 +#define MASK_SRL 0xfe00707f +#define MATCH_SRL16 0x52000077 +#define MASK_SRL16 0xfe00707f +#define MATCH_SRL16_U 0x62000077 +#define MASK_SRL16_U 0xfe00707f +#define MATCH_SRL32 0x52002077 +#define MASK_SRL32 0xfe00707f +#define MATCH_SRL32_U 0x62002077 +#define MASK_SRL32_U 0xfe00707f +#define MATCH_SRL8 0x5a000077 +#define MASK_SRL8 0xfe00707f +#define MATCH_SRL8_U 0x6a000077 +#define MASK_SRL8_U 0xfe00707f +#define MATCH_SRLI 0x5013 +#define MASK_SRLI 0xfc00707f +#define MATCH_SRLI16 0x72000077 +#define MASK_SRLI16 0xff00707f +#define MATCH_SRLI16_U 0x73000077 +#define MASK_SRLI16_U 0xff00707f +#define MATCH_SRLI32 0x72002077 +#define MASK_SRLI32 0xfe00707f +#define MATCH_SRLI32_U 0x82002077 +#define MASK_SRLI32_U 0xfe00707f +#define MATCH_SRLI8 0x7a000077 +#define MASK_SRLI8 0xff80707f +#define MATCH_SRLI8_U 0x7a800077 +#define MASK_SRLI8_U 0xff80707f +#define MATCH_SRLI_RV32 0x5013 +#define MASK_SRLI_RV32 0xfe00707f +#define MATCH_SRLIW 0x501b +#define MASK_SRLIW 0xfe00707f +#define MATCH_SRLW 0x503b +#define MASK_SRLW 0xfe00707f +#define MATCH_SRO 0x20005033 +#define MASK_SRO 0xfe00707f +#define MATCH_SROI 0x20005013 +#define MASK_SROI 0xfc00707f +#define MATCH_SROIW 0x2000501b +#define MASK_SROIW 0xfe00707f +#define MATCH_SROW 0x2000503b +#define MASK_SROW 0xfe00707f +#define MATCH_STAS16 0xf4002077 +#define MASK_STAS16 0xfe00707f +#define MATCH_STAS32 0xf0002077 +#define MASK_STAS32 0xfe00707f +#define MATCH_STSA16 0xf6002077 +#define MASK_STSA16 0xfe00707f +#define MATCH_STSA32 0xf2002077 +#define MASK_STSA32 0xfe00707f +#define MATCH_SUB 0x40000033 +#define MASK_SUB 0xfe00707f +#define MATCH_SUB16 0x42000077 +#define MASK_SUB16 0xfe00707f +#define MATCH_SUB32 0x42002077 +#define MASK_SUB32 0xfe00707f +#define MATCH_SUB64 0xc2001077 +#define MASK_SUB64 0xfe00707f +#define MATCH_SUB8 0x4a000077 +#define MASK_SUB8 0xfe00707f +#define MATCH_SUBW 0x4000003b +#define MASK_SUBW 0xfe00707f +#define MATCH_SUNPKD810 0xac800077 +#define MASK_SUNPKD810 0xfff0707f +#define MATCH_SUNPKD820 0xac900077 +#define MASK_SUNPKD820 0xfff0707f +#define MATCH_SUNPKD830 0xaca00077 +#define MASK_SUNPKD830 0xfff0707f +#define MATCH_SUNPKD831 0xacb00077 +#define MASK_SUNPKD831 0xfff0707f +#define MATCH_SUNPKD832 0xad300077 +#define MASK_SUNPKD832 0xfff0707f +#define MATCH_SW 0x2023 +#define MASK_SW 0x707f +#define MATCH_UCLIP16 0x85000077 +#define MASK_UCLIP16 0xff00707f +#define MATCH_UCLIP32 0xf4000077 +#define MASK_UCLIP32 0xfe00707f +#define MATCH_UCLIP8 0x8d000077 +#define MASK_UCLIP8 0xff80707f +#define MATCH_UCMPLE16 0x3c000077 +#define MASK_UCMPLE16 0xfe00707f +#define MATCH_UCMPLE8 0x3e000077 +#define MASK_UCMPLE8 0xfe00707f +#define MATCH_UCMPLT16 0x2c000077 +#define MASK_UCMPLT16 0xfe00707f +#define MATCH_UCMPLT8 0x2e000077 +#define MASK_UCMPLT8 0xfe00707f +#define MATCH_UKADD16 0x30000077 +#define MASK_UKADD16 0xfe00707f +#define MATCH_UKADD32 0x30002077 +#define MASK_UKADD32 0xfe00707f +#define MATCH_UKADD64 0xb0001077 +#define MASK_UKADD64 0xfe00707f +#define MATCH_UKADD8 0x38000077 +#define MASK_UKADD8 0xfe00707f +#define MATCH_UKADDH 0x14001077 +#define MASK_UKADDH 0xfe00707f +#define MATCH_UKADDW 0x10001077 +#define MASK_UKADDW 0xfe00707f +#define MATCH_UKCRAS16 0x34000077 +#define MASK_UKCRAS16 0xfe00707f +#define MATCH_UKCRAS32 0x34002077 +#define MASK_UKCRAS32 0xfe00707f +#define MATCH_UKCRSA16 0x36000077 +#define MASK_UKCRSA16 0xfe00707f +#define MATCH_UKCRSA32 0x36002077 +#define MASK_UKCRSA32 0xfe00707f +#define MATCH_UKMAR64 0xb4001077 +#define MASK_UKMAR64 0xfe00707f +#define MATCH_UKMSR64 0xb6001077 +#define MASK_UKMSR64 0xfe00707f +#define MATCH_UKSTAS16 0xe4002077 +#define MASK_UKSTAS16 0xfe00707f +#define MATCH_UKSTAS32 0xe0002077 +#define MASK_UKSTAS32 0xfe00707f +#define MATCH_UKSTSA16 0xe6002077 +#define MASK_UKSTSA16 0xfe00707f +#define MATCH_UKSTSA32 0xe2002077 +#define MASK_UKSTSA32 0xfe00707f +#define MATCH_UKSUB16 0x32000077 +#define MASK_UKSUB16 0xfe00707f +#define MATCH_UKSUB32 0x32002077 +#define MASK_UKSUB32 0xfe00707f +#define MATCH_UKSUB64 0xb2001077 +#define MASK_UKSUB64 0xfe00707f +#define MATCH_UKSUB8 0x3a000077 +#define MASK_UKSUB8 0xfe00707f +#define MATCH_UKSUBH 0x16001077 +#define MASK_UKSUBH 0xfe00707f +#define MATCH_UKSUBW 0x12001077 +#define MASK_UKSUBW 0xfe00707f +#define MATCH_UMAQA 0xcc000077 +#define MASK_UMAQA 0xfe00707f +#define MATCH_UMAR64 0xa4001077 +#define MASK_UMAR64 0xfe00707f +#define MATCH_UMAX16 0x92000077 +#define MASK_UMAX16 0xfe00707f +#define MATCH_UMAX32 0xa2002077 +#define MASK_UMAX32 0xfe00707f +#define MATCH_UMAX8 0x9a000077 +#define MASK_UMAX8 0xfe00707f +#define MATCH_UMIN16 0x90000077 +#define MASK_UMIN16 0xfe00707f +#define MATCH_UMIN32 0xa0002077 +#define MASK_UMIN32 0xfe00707f +#define MATCH_UMIN8 0x98000077 +#define MASK_UMIN8 0xfe00707f +#define MATCH_UMSR64 0xa6001077 +#define MASK_UMSR64 0xfe00707f +#define MATCH_UMUL16 0xb0000077 +#define MASK_UMUL16 0xfe00707f +#define MATCH_UMUL8 0xb8000077 +#define MASK_UMUL8 0xfe00707f +#define MATCH_UMULX16 0xb2000077 +#define MASK_UMULX16 0xfe00707f +#define MATCH_UMULX8 0xba000077 +#define MASK_UMULX8 0xfe00707f +#define MATCH_UNSHFL 0x8005033 +#define MASK_UNSHFL 0xfe00707f +#define MATCH_UNSHFLI 0x8005013 +#define MASK_UNSHFLI 0xfe00707f +#define MATCH_UNSHFLW 0x800503b +#define MASK_UNSHFLW 0xfe00707f +#define MATCH_URADD16 0x20000077 +#define MASK_URADD16 0xfe00707f +#define MATCH_URADD32 0x20002077 +#define MASK_URADD32 0xfe00707f +#define MATCH_URADD64 0xa0001077 +#define MASK_URADD64 0xfe00707f +#define MATCH_URADD8 0x28000077 +#define MASK_URADD8 0xfe00707f +#define MATCH_URADDW 0x30001077 +#define MASK_URADDW 0xfe00707f +#define MATCH_URCRAS16 0x24000077 +#define MASK_URCRAS16 0xfe00707f +#define MATCH_URCRAS32 0x24002077 +#define MASK_URCRAS32 0xfe00707f +#define MATCH_URCRSA16 0x26000077 +#define MASK_URCRSA16 0xfe00707f +#define MATCH_URCRSA32 0x26002077 +#define MASK_URCRSA32 0xfe00707f +#define MATCH_URSTAS16 0xd4002077 +#define MASK_URSTAS16 0xfe00707f +#define MATCH_URSTAS32 0xd0002077 +#define MASK_URSTAS32 0xfe00707f +#define MATCH_URSTSA16 0xd6002077 +#define MASK_URSTSA16 0xfe00707f +#define MATCH_URSTSA32 0xd2002077 +#define MASK_URSTSA32 0xfe00707f +#define MATCH_URSUB16 0x22000077 +#define MASK_URSUB16 0xfe00707f +#define MATCH_URSUB32 0x22002077 +#define MASK_URSUB32 0xfe00707f +#define MATCH_URSUB64 0xa2001077 +#define MASK_URSUB64 0xfe00707f +#define MATCH_URSUB8 0x2a000077 +#define MASK_URSUB8 0xfe00707f +#define MATCH_URSUBW 0x32001077 +#define MASK_URSUBW 0xfe00707f +#define MATCH_VAADD_VV 0x24002057 +#define MASK_VAADD_VV 0xfc00707f +#define MATCH_VAADD_VX 0x24006057 +#define MASK_VAADD_VX 0xfc00707f +#define MATCH_VAADDU_VV 0x20002057 +#define MASK_VAADDU_VV 0xfc00707f +#define MATCH_VAADDU_VX 0x20006057 +#define MASK_VAADDU_VX 0xfc00707f +#define MATCH_VADC_VIM 0x40003057 +#define MASK_VADC_VIM 0xfe00707f +#define MATCH_VADC_VVM 0x40000057 +#define MASK_VADC_VVM 0xfe00707f +#define MATCH_VADC_VXM 0x40004057 +#define MASK_VADC_VXM 0xfe00707f +#define MATCH_VADD_VI 0x3057 +#define MASK_VADD_VI 0xfc00707f +#define MATCH_VADD_VV 0x57 +#define MASK_VADD_VV 0xfc00707f +#define MATCH_VADD_VX 0x4057 +#define MASK_VADD_VX 0xfc00707f +#define MATCH_VAMOADDEI16_V 0x502f +#define MASK_VAMOADDEI16_V 0xf800707f +#define MATCH_VAMOADDEI32_V 0x602f +#define MASK_VAMOADDEI32_V 0xf800707f +#define MATCH_VAMOADDEI64_V 0x702f +#define MASK_VAMOADDEI64_V 0xf800707f +#define MATCH_VAMOADDEI8_V 0x2f +#define MASK_VAMOADDEI8_V 0xf800707f +#define MATCH_VAMOANDEI16_V 0x6000502f +#define MASK_VAMOANDEI16_V 0xf800707f +#define MATCH_VAMOANDEI32_V 0x6000602f +#define MASK_VAMOANDEI32_V 0xf800707f +#define MATCH_VAMOANDEI64_V 0x6000702f +#define MASK_VAMOANDEI64_V 0xf800707f +#define MATCH_VAMOANDEI8_V 0x6000002f +#define MASK_VAMOANDEI8_V 0xf800707f +#define MATCH_VAMOMAXEI16_V 0xa000502f +#define MASK_VAMOMAXEI16_V 0xf800707f +#define MATCH_VAMOMAXEI32_V 0xa000602f +#define MASK_VAMOMAXEI32_V 0xf800707f +#define MATCH_VAMOMAXEI64_V 0xa000702f +#define MASK_VAMOMAXEI64_V 0xf800707f +#define MATCH_VAMOMAXEI8_V 0xa000002f +#define MASK_VAMOMAXEI8_V 0xf800707f +#define MATCH_VAMOMAXUEI16_V 0xe000502f +#define MASK_VAMOMAXUEI16_V 0xf800707f +#define MATCH_VAMOMAXUEI32_V 0xe000602f +#define MASK_VAMOMAXUEI32_V 0xf800707f +#define MATCH_VAMOMAXUEI64_V 0xe000702f +#define MASK_VAMOMAXUEI64_V 0xf800707f +#define MATCH_VAMOMAXUEI8_V 0xe000002f +#define MASK_VAMOMAXUEI8_V 0xf800707f +#define MATCH_VAMOMINEI16_V 0x8000502f +#define MASK_VAMOMINEI16_V 0xf800707f +#define MATCH_VAMOMINEI32_V 0x8000602f +#define MASK_VAMOMINEI32_V 0xf800707f +#define MATCH_VAMOMINEI64_V 0x8000702f +#define MASK_VAMOMINEI64_V 0xf800707f +#define MATCH_VAMOMINEI8_V 0x8000002f +#define MASK_VAMOMINEI8_V 0xf800707f +#define MATCH_VAMOMINUEI16_V 0xc000502f +#define MASK_VAMOMINUEI16_V 0xf800707f +#define MATCH_VAMOMINUEI32_V 0xc000602f +#define MASK_VAMOMINUEI32_V 0xf800707f +#define MATCH_VAMOMINUEI64_V 0xc000702f +#define MASK_VAMOMINUEI64_V 0xf800707f +#define MATCH_VAMOMINUEI8_V 0xc000002f +#define MASK_VAMOMINUEI8_V 0xf800707f +#define MATCH_VAMOOREI16_V 0x4000502f +#define MASK_VAMOOREI16_V 0xf800707f +#define MATCH_VAMOOREI32_V 0x4000602f +#define MASK_VAMOOREI32_V 0xf800707f +#define MATCH_VAMOOREI64_V 0x4000702f +#define MASK_VAMOOREI64_V 0xf800707f +#define MATCH_VAMOOREI8_V 0x4000002f +#define MASK_VAMOOREI8_V 0xf800707f +#define MATCH_VAMOSWAPEI16_V 0x800502f +#define MASK_VAMOSWAPEI16_V 0xf800707f +#define MATCH_VAMOSWAPEI32_V 0x800602f +#define MASK_VAMOSWAPEI32_V 0xf800707f +#define MATCH_VAMOSWAPEI64_V 0x800702f +#define MASK_VAMOSWAPEI64_V 0xf800707f +#define MATCH_VAMOSWAPEI8_V 0x800002f +#define MASK_VAMOSWAPEI8_V 0xf800707f +#define MATCH_VAMOXOREI16_V 0x2000502f +#define MASK_VAMOXOREI16_V 0xf800707f +#define MATCH_VAMOXOREI32_V 0x2000602f +#define MASK_VAMOXOREI32_V 0xf800707f +#define MATCH_VAMOXOREI64_V 0x2000702f +#define MASK_VAMOXOREI64_V 0xf800707f +#define MATCH_VAMOXOREI8_V 0x2000002f +#define MASK_VAMOXOREI8_V 0xf800707f +#define MATCH_VAND_VI 0x24003057 +#define MASK_VAND_VI 0xfc00707f +#define MATCH_VAND_VV 0x24000057 +#define MASK_VAND_VV 0xfc00707f +#define MATCH_VAND_VX 0x24004057 +#define MASK_VAND_VX 0xfc00707f +#define MATCH_VASUB_VV 0x2c002057 +#define MASK_VASUB_VV 0xfc00707f +#define MATCH_VASUB_VX 0x2c006057 +#define MASK_VASUB_VX 0xfc00707f +#define MATCH_VASUBU_VV 0x28002057 +#define MASK_VASUBU_VV 0xfc00707f +#define MATCH_VASUBU_VX 0x28006057 +#define MASK_VASUBU_VX 0xfc00707f +#define MATCH_VCOMPRESS_VM 0x5e002057 +#define MASK_VCOMPRESS_VM 0xfe00707f +#define MATCH_VCPOP_M 0x40082057 +#define MASK_VCPOP_M 0xfc0ff07f +#define MATCH_VDIV_VV 0x84002057 +#define MASK_VDIV_VV 0xfc00707f +#define MATCH_VDIV_VX 0x84006057 +#define MASK_VDIV_VX 0xfc00707f +#define MATCH_VDIVU_VV 0x80002057 +#define MASK_VDIVU_VV 0xfc00707f +#define MATCH_VDIVU_VX 0x80006057 +#define MASK_VDIVU_VX 0xfc00707f +#define MATCH_VFADD_VF 0x5057 +#define MASK_VFADD_VF 0xfc00707f +#define MATCH_VFADD_VV 0x1057 +#define MASK_VFADD_VV 0xfc00707f +#define MATCH_VFCLASS_V 0x4c081057 +#define MASK_VFCLASS_V 0xfc0ff07f +#define MATCH_VFCVT_F_X_V 0x48019057 +#define MASK_VFCVT_F_X_V 0xfc0ff07f +#define MATCH_VFCVT_F_XU_V 0x48011057 +#define MASK_VFCVT_F_XU_V 0xfc0ff07f +#define MATCH_VFCVT_RTZ_X_F_V 0x48039057 +#define MASK_VFCVT_RTZ_X_F_V 0xfc0ff07f +#define MATCH_VFCVT_RTZ_XU_F_V 0x48031057 +#define MASK_VFCVT_RTZ_XU_F_V 0xfc0ff07f +#define MATCH_VFCVT_X_F_V 0x48009057 +#define MASK_VFCVT_X_F_V 0xfc0ff07f +#define MATCH_VFCVT_XU_F_V 0x48001057 +#define MASK_VFCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFDIV_VF 0x80005057 +#define MASK_VFDIV_VF 0xfc00707f +#define MATCH_VFDIV_VV 0x80001057 +#define MASK_VFDIV_VV 0xfc00707f +#define MATCH_VFIRST_M 0x4008a057 +#define MASK_VFIRST_M 0xfc0ff07f +#define MATCH_VFMACC_VF 0xb0005057 +#define MASK_VFMACC_VF 0xfc00707f +#define MATCH_VFMACC_VV 0xb0001057 +#define MASK_VFMACC_VV 0xfc00707f +#define MATCH_VFMADD_VF 0xa0005057 +#define MASK_VFMADD_VF 0xfc00707f +#define MATCH_VFMADD_VV 0xa0001057 +#define MASK_VFMADD_VV 0xfc00707f +#define MATCH_VFMAX_VF 0x18005057 +#define MASK_VFMAX_VF 0xfc00707f +#define MATCH_VFMAX_VV 0x18001057 +#define MASK_VFMAX_VV 0xfc00707f +#define MATCH_VFMERGE_VFM 0x5c005057 +#define MASK_VFMERGE_VFM 0xfe00707f +#define MATCH_VFMIN_VF 0x10005057 +#define MASK_VFMIN_VF 0xfc00707f +#define MATCH_VFMIN_VV 0x10001057 +#define MASK_VFMIN_VV 0xfc00707f +#define MATCH_VFMSAC_VF 0xb8005057 +#define MASK_VFMSAC_VF 0xfc00707f +#define MATCH_VFMSAC_VV 0xb8001057 +#define MASK_VFMSAC_VV 0xfc00707f +#define MATCH_VFMSUB_VF 0xa8005057 +#define MASK_VFMSUB_VF 0xfc00707f +#define MATCH_VFMSUB_VV 0xa8001057 +#define MASK_VFMSUB_VV 0xfc00707f +#define MATCH_VFMUL_VF 0x90005057 +#define MASK_VFMUL_VF 0xfc00707f +#define MATCH_VFMUL_VV 0x90001057 +#define MASK_VFMUL_VV 0xfc00707f +#define MATCH_VFMV_F_S 0x42001057 +#define MASK_VFMV_F_S 0xfe0ff07f +#define MATCH_VFMV_S_F 0x42005057 +#define MASK_VFMV_S_F 0xfff0707f +#define MATCH_VFMV_V_F 0x5e005057 +#define MASK_VFMV_V_F 0xfff0707f +#define MATCH_VFNCVT_F_F_W 0x480a1057 +#define MASK_VFNCVT_F_F_W 0xfc0ff07f +#define MATCH_VFNCVT_F_X_W 0x48099057 +#define MASK_VFNCVT_F_X_W 0xfc0ff07f +#define MATCH_VFNCVT_F_XU_W 0x48091057 +#define MASK_VFNCVT_F_XU_W 0xfc0ff07f +#define MATCH_VFNCVT_ROD_F_F_W 0x480a9057 +#define MASK_VFNCVT_ROD_F_F_W 0xfc0ff07f +#define MATCH_VFNCVT_RTZ_X_F_W 0x480b9057 +#define MASK_VFNCVT_RTZ_X_F_W 0xfc0ff07f +#define MATCH_VFNCVT_RTZ_XU_F_W 0x480b1057 +#define MASK_VFNCVT_RTZ_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVT_X_F_W 0x48089057 +#define MASK_VFNCVT_X_F_W 0xfc0ff07f +#define MATCH_VFNCVT_XU_F_W 0x48081057 +#define MASK_VFNCVT_XU_F_W 0xfc0ff07f +#define MATCH_VFNMACC_VF 0xb4005057 +#define MASK_VFNMACC_VF 0xfc00707f +#define MATCH_VFNMACC_VV 0xb4001057 +#define MASK_VFNMACC_VV 0xfc00707f +#define MATCH_VFNMADD_VF 0xa4005057 +#define MASK_VFNMADD_VF 0xfc00707f +#define MATCH_VFNMADD_VV 0xa4001057 +#define MASK_VFNMADD_VV 0xfc00707f +#define MATCH_VFNMSAC_VF 0xbc005057 +#define MASK_VFNMSAC_VF 0xfc00707f +#define MATCH_VFNMSAC_VV 0xbc001057 +#define MASK_VFNMSAC_VV 0xfc00707f +#define MATCH_VFNMSUB_VF 0xac005057 +#define MASK_VFNMSUB_VF 0xfc00707f +#define MATCH_VFNMSUB_VV 0xac001057 +#define MASK_VFNMSUB_VV 0xfc00707f +#define MATCH_VFRDIV_VF 0x84005057 +#define MASK_VFRDIV_VF 0xfc00707f +#define MATCH_VFREC7_V 0x4c029057 +#define MASK_VFREC7_V 0xfc0ff07f +#define MATCH_VFREDMAX_VS 0x1c001057 +#define MASK_VFREDMAX_VS 0xfc00707f +#define MATCH_VFREDMIN_VS 0x14001057 +#define MASK_VFREDMIN_VS 0xfc00707f +#define MATCH_VFREDOSUM_VS 0xc001057 +#define MASK_VFREDOSUM_VS 0xfc00707f +#define MATCH_VFREDUSUM_VS 0x4001057 +#define MASK_VFREDUSUM_VS 0xfc00707f +#define MATCH_VFRSQRT7_V 0x4c021057 +#define MASK_VFRSQRT7_V 0xfc0ff07f +#define MATCH_VFRSUB_VF 0x9c005057 +#define MASK_VFRSUB_VF 0xfc00707f +#define MATCH_VFSGNJ_VF 0x20005057 +#define MASK_VFSGNJ_VF 0xfc00707f +#define MATCH_VFSGNJ_VV 0x20001057 +#define MASK_VFSGNJ_VV 0xfc00707f +#define MATCH_VFSGNJN_VF 0x24005057 +#define MASK_VFSGNJN_VF 0xfc00707f +#define MATCH_VFSGNJN_VV 0x24001057 +#define MASK_VFSGNJN_VV 0xfc00707f +#define MATCH_VFSGNJX_VF 0x28005057 +#define MASK_VFSGNJX_VF 0xfc00707f +#define MATCH_VFSGNJX_VV 0x28001057 +#define MASK_VFSGNJX_VV 0xfc00707f +#define MATCH_VFSLIDE1DOWN_VF 0x3c005057 +#define MASK_VFSLIDE1DOWN_VF 0xfc00707f +#define MATCH_VFSLIDE1UP_VF 0x38005057 +#define MASK_VFSLIDE1UP_VF 0xfc00707f +#define MATCH_VFSQRT_V 0x4c001057 +#define MASK_VFSQRT_V 0xfc0ff07f +#define MATCH_VFSUB_VF 0x8005057 +#define MASK_VFSUB_VF 0xfc00707f +#define MATCH_VFSUB_VV 0x8001057 +#define MASK_VFSUB_VV 0xfc00707f +#define MATCH_VFWADD_VF 0xc0005057 +#define MASK_VFWADD_VF 0xfc00707f +#define MATCH_VFWADD_VV 0xc0001057 +#define MASK_VFWADD_VV 0xfc00707f +#define MATCH_VFWADD_WF 0xd0005057 +#define MASK_VFWADD_WF 0xfc00707f +#define MATCH_VFWADD_WV 0xd0001057 +#define MASK_VFWADD_WV 0xfc00707f +#define MATCH_VFWCVT_F_F_V 0x48061057 +#define MASK_VFWCVT_F_F_V 0xfc0ff07f +#define MATCH_VFWCVT_F_X_V 0x48059057 +#define MASK_VFWCVT_F_X_V 0xfc0ff07f +#define MATCH_VFWCVT_F_XU_V 0x48051057 +#define MASK_VFWCVT_F_XU_V 0xfc0ff07f +#define MATCH_VFWCVT_RTZ_X_F_V 0x48079057 +#define MASK_VFWCVT_RTZ_X_F_V 0xfc0ff07f +#define MATCH_VFWCVT_RTZ_XU_F_V 0x48071057 +#define MASK_VFWCVT_RTZ_XU_F_V 0xfc0ff07f +#define MATCH_VFWCVT_X_F_V 0x48049057 +#define MASK_VFWCVT_X_F_V 0xfc0ff07f +#define MATCH_VFWCVT_XU_F_V 0x48041057 +#define MASK_VFWCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFWMACC_VF 0xf0005057 +#define MASK_VFWMACC_VF 0xfc00707f +#define MATCH_VFWMACC_VV 0xf0001057 +#define MASK_VFWMACC_VV 0xfc00707f +#define MATCH_VFWMSAC_VF 0xf8005057 +#define MASK_VFWMSAC_VF 0xfc00707f +#define MATCH_VFWMSAC_VV 0xf8001057 +#define MASK_VFWMSAC_VV 0xfc00707f +#define MATCH_VFWMUL_VF 0xe0005057 +#define MASK_VFWMUL_VF 0xfc00707f +#define MATCH_VFWMUL_VV 0xe0001057 +#define MASK_VFWMUL_VV 0xfc00707f +#define MATCH_VFWNMACC_VF 0xf4005057 +#define MASK_VFWNMACC_VF 0xfc00707f +#define MATCH_VFWNMACC_VV 0xf4001057 +#define MASK_VFWNMACC_VV 0xfc00707f +#define MATCH_VFWNMSAC_VF 0xfc005057 +#define MASK_VFWNMSAC_VF 0xfc00707f +#define MATCH_VFWNMSAC_VV 0xfc001057 +#define MASK_VFWNMSAC_VV 0xfc00707f +#define MATCH_VFWREDOSUM_VS 0xcc001057 +#define MASK_VFWREDOSUM_VS 0xfc00707f +#define MATCH_VFWREDUSUM_VS 0xc4001057 +#define MASK_VFWREDUSUM_VS 0xfc00707f +#define MATCH_VFWSUB_VF 0xc8005057 +#define MASK_VFWSUB_VF 0xfc00707f +#define MATCH_VFWSUB_VV 0xc8001057 +#define MASK_VFWSUB_VV 0xfc00707f +#define MATCH_VFWSUB_WF 0xd8005057 +#define MASK_VFWSUB_WF 0xfc00707f +#define MATCH_VFWSUB_WV 0xd8001057 +#define MASK_VFWSUB_WV 0xfc00707f +#define MATCH_VID_V 0x5008a057 +#define MASK_VID_V 0xfdfff07f +#define MATCH_VIOTA_M 0x50082057 +#define MASK_VIOTA_M 0xfc0ff07f +#define MATCH_VL1RE16_V 0x2805007 +#define MASK_VL1RE16_V 0xfff0707f +#define MATCH_VL1RE32_V 0x2806007 +#define MASK_VL1RE32_V 0xfff0707f +#define MATCH_VL1RE64_V 0x2807007 +#define MASK_VL1RE64_V 0xfff0707f +#define MATCH_VL1RE8_V 0x2800007 +#define MASK_VL1RE8_V 0xfff0707f +#define MATCH_VL2RE16_V 0x22805007 +#define MASK_VL2RE16_V 0xfff0707f +#define MATCH_VL2RE32_V 0x22806007 +#define MASK_VL2RE32_V 0xfff0707f +#define MATCH_VL2RE64_V 0x22807007 +#define MASK_VL2RE64_V 0xfff0707f +#define MATCH_VL2RE8_V 0x22800007 +#define MASK_VL2RE8_V 0xfff0707f +#define MATCH_VL4RE16_V 0x62805007 +#define MASK_VL4RE16_V 0xfff0707f +#define MATCH_VL4RE32_V 0x62806007 +#define MASK_VL4RE32_V 0xfff0707f +#define MATCH_VL4RE64_V 0x62807007 +#define MASK_VL4RE64_V 0xfff0707f +#define MATCH_VL4RE8_V 0x62800007 +#define MASK_VL4RE8_V 0xfff0707f +#define MATCH_VL8RE16_V 0xe2805007 +#define MASK_VL8RE16_V 0xfff0707f +#define MATCH_VL8RE32_V 0xe2806007 +#define MASK_VL8RE32_V 0xfff0707f +#define MATCH_VL8RE64_V 0xe2807007 +#define MASK_VL8RE64_V 0xfff0707f +#define MATCH_VL8RE8_V 0xe2800007 +#define MASK_VL8RE8_V 0xfff0707f +#define MATCH_VLE1024_V 0x10007007 +#define MASK_VLE1024_V 0x1df0707f +#define MATCH_VLE1024FF_V 0x11007007 +#define MASK_VLE1024FF_V 0x1df0707f +#define MATCH_VLE128_V 0x10000007 +#define MASK_VLE128_V 0x1df0707f +#define MATCH_VLE128FF_V 0x11000007 +#define MASK_VLE128FF_V 0x1df0707f +#define MATCH_VLE16_V 0x5007 +#define MASK_VLE16_V 0x1df0707f +#define MATCH_VLE16FF_V 0x1005007 +#define MASK_VLE16FF_V 0x1df0707f +#define MATCH_VLE256_V 0x10005007 +#define MASK_VLE256_V 0x1df0707f +#define MATCH_VLE256FF_V 0x11005007 +#define MASK_VLE256FF_V 0x1df0707f +#define MATCH_VLE32_V 0x6007 +#define MASK_VLE32_V 0x1df0707f +#define MATCH_VLE32FF_V 0x1006007 +#define MASK_VLE32FF_V 0x1df0707f +#define MATCH_VLE512_V 0x10006007 +#define MASK_VLE512_V 0x1df0707f +#define MATCH_VLE512FF_V 0x11006007 +#define MASK_VLE512FF_V 0x1df0707f +#define MATCH_VLE64_V 0x7007 +#define MASK_VLE64_V 0x1df0707f +#define MATCH_VLE64FF_V 0x1007007 +#define MASK_VLE64FF_V 0x1df0707f +#define MATCH_VLE8_V 0x7 +#define MASK_VLE8_V 0x1df0707f +#define MATCH_VLE8FF_V 0x1000007 +#define MASK_VLE8FF_V 0x1df0707f +#define MATCH_VLM_V 0x2b00007 +#define MASK_VLM_V 0xfff0707f +#define MATCH_VLOXEI1024_V 0x1c007007 +#define MASK_VLOXEI1024_V 0x1c00707f +#define MATCH_VLOXEI128_V 0x1c000007 +#define MASK_VLOXEI128_V 0x1c00707f +#define MATCH_VLOXEI16_V 0xc005007 +#define MASK_VLOXEI16_V 0x1c00707f +#define MATCH_VLOXEI256_V 0x1c005007 +#define MASK_VLOXEI256_V 0x1c00707f +#define MATCH_VLOXEI32_V 0xc006007 +#define MASK_VLOXEI32_V 0x1c00707f +#define MATCH_VLOXEI512_V 0x1c006007 +#define MASK_VLOXEI512_V 0x1c00707f +#define MATCH_VLOXEI64_V 0xc007007 +#define MASK_VLOXEI64_V 0x1c00707f +#define MATCH_VLOXEI8_V 0xc000007 +#define MASK_VLOXEI8_V 0x1c00707f +#define MATCH_VLSE1024_V 0x18007007 +#define MASK_VLSE1024_V 0x1c00707f +#define MATCH_VLSE128_V 0x18000007 +#define MASK_VLSE128_V 0x1c00707f +#define MATCH_VLSE16_V 0x8005007 +#define MASK_VLSE16_V 0x1c00707f +#define MATCH_VLSE256_V 0x18005007 +#define MASK_VLSE256_V 0x1c00707f +#define MATCH_VLSE32_V 0x8006007 +#define MASK_VLSE32_V 0x1c00707f +#define MATCH_VLSE512_V 0x18006007 +#define MASK_VLSE512_V 0x1c00707f +#define MATCH_VLSE64_V 0x8007007 +#define MASK_VLSE64_V 0x1c00707f +#define MATCH_VLSE8_V 0x8000007 +#define MASK_VLSE8_V 0x1c00707f +#define MATCH_VLUXEI1024_V 0x14007007 +#define MASK_VLUXEI1024_V 0x1c00707f +#define MATCH_VLUXEI128_V 0x14000007 +#define MASK_VLUXEI128_V 0x1c00707f +#define MATCH_VLUXEI16_V 0x4005007 +#define MASK_VLUXEI16_V 0x1c00707f +#define MATCH_VLUXEI256_V 0x14005007 +#define MASK_VLUXEI256_V 0x1c00707f +#define MATCH_VLUXEI32_V 0x4006007 +#define MASK_VLUXEI32_V 0x1c00707f +#define MATCH_VLUXEI512_V 0x14006007 +#define MASK_VLUXEI512_V 0x1c00707f +#define MATCH_VLUXEI64_V 0x4007007 +#define MASK_VLUXEI64_V 0x1c00707f +#define MATCH_VLUXEI8_V 0x4000007 +#define MASK_VLUXEI8_V 0x1c00707f +#define MATCH_VMACC_VV 0xb4002057 +#define MASK_VMACC_VV 0xfc00707f +#define MATCH_VMACC_VX 0xb4006057 +#define MASK_VMACC_VX 0xfc00707f +#define MATCH_VMADC_VI 0x46003057 +#define MASK_VMADC_VI 0xfe00707f +#define MATCH_VMADC_VIM 0x44003057 +#define MASK_VMADC_VIM 0xfe00707f +#define MATCH_VMADC_VV 0x46000057 +#define MASK_VMADC_VV 0xfe00707f +#define MATCH_VMADC_VVM 0x44000057 +#define MASK_VMADC_VVM 0xfe00707f +#define MATCH_VMADC_VX 0x46004057 +#define MASK_VMADC_VX 0xfe00707f +#define MATCH_VMADC_VXM 0x44004057 +#define MASK_VMADC_VXM 0xfe00707f +#define MATCH_VMADD_VV 0xa4002057 +#define MASK_VMADD_VV 0xfc00707f +#define MATCH_VMADD_VX 0xa4006057 +#define MASK_VMADD_VX 0xfc00707f +#define MATCH_VMAND_MM 0x64002057 +#define MASK_VMAND_MM 0xfc00707f +#define MATCH_VMANDN_MM 0x60002057 +#define MASK_VMANDN_MM 0xfc00707f +#define MATCH_VMAX_VV 0x1c000057 +#define MASK_VMAX_VV 0xfc00707f +#define MATCH_VMAX_VX 0x1c004057 +#define MASK_VMAX_VX 0xfc00707f +#define MATCH_VMAXU_VV 0x18000057 +#define MASK_VMAXU_VV 0xfc00707f +#define MATCH_VMAXU_VX 0x18004057 +#define MASK_VMAXU_VX 0xfc00707f +#define MATCH_VMERGE_VIM 0x5c003057 +#define MASK_VMERGE_VIM 0xfe00707f +#define MATCH_VMERGE_VVM 0x5c000057 +#define MASK_VMERGE_VVM 0xfe00707f +#define MATCH_VMERGE_VXM 0x5c004057 +#define MASK_VMERGE_VXM 0xfe00707f +#define MATCH_VMFEQ_VF 0x60005057 +#define MASK_VMFEQ_VF 0xfc00707f +#define MATCH_VMFEQ_VV 0x60001057 +#define MASK_VMFEQ_VV 0xfc00707f +#define MATCH_VMFGE_VF 0x7c005057 +#define MASK_VMFGE_VF 0xfc00707f +#define MATCH_VMFGT_VF 0x74005057 +#define MASK_VMFGT_VF 0xfc00707f +#define MATCH_VMFLE_VF 0x64005057 +#define MASK_VMFLE_VF 0xfc00707f +#define MATCH_VMFLE_VV 0x64001057 +#define MASK_VMFLE_VV 0xfc00707f +#define MATCH_VMFLT_VF 0x6c005057 +#define MASK_VMFLT_VF 0xfc00707f +#define MATCH_VMFLT_VV 0x6c001057 +#define MASK_VMFLT_VV 0xfc00707f +#define MATCH_VMFNE_VF 0x70005057 +#define MASK_VMFNE_VF 0xfc00707f +#define MATCH_VMFNE_VV 0x70001057 +#define MASK_VMFNE_VV 0xfc00707f +#define MATCH_VMIN_VV 0x14000057 +#define MASK_VMIN_VV 0xfc00707f +#define MATCH_VMIN_VX 0x14004057 +#define MASK_VMIN_VX 0xfc00707f +#define MATCH_VMINU_VV 0x10000057 +#define MASK_VMINU_VV 0xfc00707f +#define MATCH_VMINU_VX 0x10004057 +#define MASK_VMINU_VX 0xfc00707f +#define MATCH_VMNAND_MM 0x74002057 +#define MASK_VMNAND_MM 0xfc00707f +#define MATCH_VMNOR_MM 0x78002057 +#define MASK_VMNOR_MM 0xfc00707f +#define MATCH_VMOR_MM 0x68002057 +#define MASK_VMOR_MM 0xfc00707f +#define MATCH_VMORN_MM 0x70002057 +#define MASK_VMORN_MM 0xfc00707f +#define MATCH_VMSBC_VV 0x4e000057 +#define MASK_VMSBC_VV 0xfe00707f +#define MATCH_VMSBC_VVM 0x4c000057 +#define MASK_VMSBC_VVM 0xfe00707f +#define MATCH_VMSBC_VX 0x4e004057 +#define MASK_VMSBC_VX 0xfe00707f +#define MATCH_VMSBC_VXM 0x4c004057 +#define MASK_VMSBC_VXM 0xfe00707f +#define MATCH_VMSBF_M 0x5000a057 +#define MASK_VMSBF_M 0xfc0ff07f +#define MATCH_VMSEQ_VI 0x60003057 +#define MASK_VMSEQ_VI 0xfc00707f +#define MATCH_VMSEQ_VV 0x60000057 +#define MASK_VMSEQ_VV 0xfc00707f +#define MATCH_VMSEQ_VX 0x60004057 +#define MASK_VMSEQ_VX 0xfc00707f +#define MATCH_VMSGT_VI 0x7c003057 +#define MASK_VMSGT_VI 0xfc00707f +#define MATCH_VMSGT_VX 0x7c004057 +#define MASK_VMSGT_VX 0xfc00707f +#define MATCH_VMSGTU_VI 0x78003057 +#define MASK_VMSGTU_VI 0xfc00707f +#define MATCH_VMSGTU_VX 0x78004057 +#define MASK_VMSGTU_VX 0xfc00707f +#define MATCH_VMSIF_M 0x5001a057 +#define MASK_VMSIF_M 0xfc0ff07f +#define MATCH_VMSLE_VI 0x74003057 +#define MASK_VMSLE_VI 0xfc00707f +#define MATCH_VMSLE_VV 0x74000057 +#define MASK_VMSLE_VV 0xfc00707f +#define MATCH_VMSLE_VX 0x74004057 +#define MASK_VMSLE_VX 0xfc00707f +#define MATCH_VMSLEU_VI 0x70003057 +#define MASK_VMSLEU_VI 0xfc00707f +#define MATCH_VMSLEU_VV 0x70000057 +#define MASK_VMSLEU_VV 0xfc00707f +#define MATCH_VMSLEU_VX 0x70004057 +#define MASK_VMSLEU_VX 0xfc00707f +#define MATCH_VMSLT_VV 0x6c000057 +#define MASK_VMSLT_VV 0xfc00707f +#define MATCH_VMSLT_VX 0x6c004057 +#define MASK_VMSLT_VX 0xfc00707f +#define MATCH_VMSLTU_VV 0x68000057 +#define MASK_VMSLTU_VV 0xfc00707f +#define MATCH_VMSLTU_VX 0x68004057 +#define MASK_VMSLTU_VX 0xfc00707f +#define MATCH_VMSNE_VI 0x64003057 +#define MASK_VMSNE_VI 0xfc00707f +#define MATCH_VMSNE_VV 0x64000057 +#define MASK_VMSNE_VV 0xfc00707f +#define MATCH_VMSNE_VX 0x64004057 +#define MASK_VMSNE_VX 0xfc00707f +#define MATCH_VMSOF_M 0x50012057 +#define MASK_VMSOF_M 0xfc0ff07f +#define MATCH_VMUL_VV 0x94002057 +#define MASK_VMUL_VV 0xfc00707f +#define MATCH_VMUL_VX 0x94006057 +#define MASK_VMUL_VX 0xfc00707f +#define MATCH_VMULH_VV 0x9c002057 +#define MASK_VMULH_VV 0xfc00707f +#define MATCH_VMULH_VX 0x9c006057 +#define MASK_VMULH_VX 0xfc00707f +#define MATCH_VMULHSU_VV 0x98002057 +#define MASK_VMULHSU_VV 0xfc00707f +#define MATCH_VMULHSU_VX 0x98006057 +#define MASK_VMULHSU_VX 0xfc00707f +#define MATCH_VMULHU_VV 0x90002057 +#define MASK_VMULHU_VV 0xfc00707f +#define MATCH_VMULHU_VX 0x90006057 +#define MASK_VMULHU_VX 0xfc00707f +#define MATCH_VMV1R_V 0x9e003057 +#define MASK_VMV1R_V 0xfe0ff07f +#define MATCH_VMV2R_V 0x9e00b057 +#define MASK_VMV2R_V 0xfe0ff07f +#define MATCH_VMV4R_V 0x9e01b057 +#define MASK_VMV4R_V 0xfe0ff07f +#define MATCH_VMV8R_V 0x9e03b057 +#define MASK_VMV8R_V 0xfe0ff07f +#define MATCH_VMV_S_X 0x42006057 +#define MASK_VMV_S_X 0xfff0707f +#define MATCH_VMV_V_I 0x5e003057 +#define MASK_VMV_V_I 0xfff0707f +#define MATCH_VMV_V_V 0x5e000057 +#define MASK_VMV_V_V 0xfff0707f +#define MATCH_VMV_V_X 0x5e004057 +#define MASK_VMV_V_X 0xfff0707f +#define MATCH_VMV_X_S 0x42002057 +#define MASK_VMV_X_S 0xfe0ff07f +#define MATCH_VMXNOR_MM 0x7c002057 +#define MASK_VMXNOR_MM 0xfc00707f +#define MATCH_VMXOR_MM 0x6c002057 +#define MASK_VMXOR_MM 0xfc00707f +#define MATCH_VNCLIP_WI 0xbc003057 +#define MASK_VNCLIP_WI 0xfc00707f +#define MATCH_VNCLIP_WV 0xbc000057 +#define MASK_VNCLIP_WV 0xfc00707f +#define MATCH_VNCLIP_WX 0xbc004057 +#define MASK_VNCLIP_WX 0xfc00707f +#define MATCH_VNCLIPU_WI 0xb8003057 +#define MASK_VNCLIPU_WI 0xfc00707f +#define MATCH_VNCLIPU_WV 0xb8000057 +#define MASK_VNCLIPU_WV 0xfc00707f +#define MATCH_VNCLIPU_WX 0xb8004057 +#define MASK_VNCLIPU_WX 0xfc00707f +#define MATCH_VNMSAC_VV 0xbc002057 +#define MASK_VNMSAC_VV 0xfc00707f +#define MATCH_VNMSAC_VX 0xbc006057 +#define MASK_VNMSAC_VX 0xfc00707f +#define MATCH_VNMSUB_VV 0xac002057 +#define MASK_VNMSUB_VV 0xfc00707f +#define MATCH_VNMSUB_VX 0xac006057 +#define MASK_VNMSUB_VX 0xfc00707f +#define MATCH_VNSRA_WI 0xb4003057 +#define MASK_VNSRA_WI 0xfc00707f +#define MATCH_VNSRA_WV 0xb4000057 +#define MASK_VNSRA_WV 0xfc00707f +#define MATCH_VNSRA_WX 0xb4004057 +#define MASK_VNSRA_WX 0xfc00707f +#define MATCH_VNSRL_WI 0xb0003057 +#define MASK_VNSRL_WI 0xfc00707f +#define MATCH_VNSRL_WV 0xb0000057 +#define MASK_VNSRL_WV 0xfc00707f +#define MATCH_VNSRL_WX 0xb0004057 +#define MASK_VNSRL_WX 0xfc00707f +#define MATCH_VOR_VI 0x28003057 +#define MASK_VOR_VI 0xfc00707f +#define MATCH_VOR_VV 0x28000057 +#define MASK_VOR_VV 0xfc00707f +#define MATCH_VOR_VX 0x28004057 +#define MASK_VOR_VX 0xfc00707f +#define MATCH_VREDAND_VS 0x4002057 +#define MASK_VREDAND_VS 0xfc00707f +#define MATCH_VREDMAX_VS 0x1c002057 +#define MASK_VREDMAX_VS 0xfc00707f +#define MATCH_VREDMAXU_VS 0x18002057 +#define MASK_VREDMAXU_VS 0xfc00707f +#define MATCH_VREDMIN_VS 0x14002057 +#define MASK_VREDMIN_VS 0xfc00707f +#define MATCH_VREDMINU_VS 0x10002057 +#define MASK_VREDMINU_VS 0xfc00707f +#define MATCH_VREDOR_VS 0x8002057 +#define MASK_VREDOR_VS 0xfc00707f +#define MATCH_VREDSUM_VS 0x2057 +#define MASK_VREDSUM_VS 0xfc00707f +#define MATCH_VREDXOR_VS 0xc002057 +#define MASK_VREDXOR_VS 0xfc00707f +#define MATCH_VREM_VV 0x8c002057 +#define MASK_VREM_VV 0xfc00707f +#define MATCH_VREM_VX 0x8c006057 +#define MASK_VREM_VX 0xfc00707f +#define MATCH_VREMU_VV 0x88002057 +#define MASK_VREMU_VV 0xfc00707f +#define MATCH_VREMU_VX 0x88006057 +#define MASK_VREMU_VX 0xfc00707f +#define MATCH_VRGATHER_VI 0x30003057 +#define MASK_VRGATHER_VI 0xfc00707f +#define MATCH_VRGATHER_VV 0x30000057 +#define MASK_VRGATHER_VV 0xfc00707f +#define MATCH_VRGATHER_VX 0x30004057 +#define MASK_VRGATHER_VX 0xfc00707f +#define MATCH_VRGATHEREI16_VV 0x38000057 +#define MASK_VRGATHEREI16_VV 0xfc00707f +#define MATCH_VRSUB_VI 0xc003057 +#define MASK_VRSUB_VI 0xfc00707f +#define MATCH_VRSUB_VX 0xc004057 +#define MASK_VRSUB_VX 0xfc00707f +#define MATCH_VS1R_V 0x2800027 +#define MASK_VS1R_V 0xfff0707f +#define MATCH_VS2R_V 0x22800027 +#define MASK_VS2R_V 0xfff0707f +#define MATCH_VS4R_V 0x62800027 +#define MASK_VS4R_V 0xfff0707f +#define MATCH_VS8R_V 0xe2800027 +#define MASK_VS8R_V 0xfff0707f +#define MATCH_VSADD_VI 0x84003057 +#define MASK_VSADD_VI 0xfc00707f +#define MATCH_VSADD_VV 0x84000057 +#define MASK_VSADD_VV 0xfc00707f +#define MATCH_VSADD_VX 0x84004057 +#define MASK_VSADD_VX 0xfc00707f +#define MATCH_VSADDU_VI 0x80003057 +#define MASK_VSADDU_VI 0xfc00707f +#define MATCH_VSADDU_VV 0x80000057 +#define MASK_VSADDU_VV 0xfc00707f +#define MATCH_VSADDU_VX 0x80004057 +#define MASK_VSADDU_VX 0xfc00707f +#define MATCH_VSBC_VVM 0x48000057 +#define MASK_VSBC_VVM 0xfe00707f +#define MATCH_VSBC_VXM 0x48004057 +#define MASK_VSBC_VXM 0xfe00707f +#define MATCH_VSE1024_V 0x10007027 +#define MASK_VSE1024_V 0x1df0707f +#define MATCH_VSE128_V 0x10000027 +#define MASK_VSE128_V 0x1df0707f +#define MATCH_VSE16_V 0x5027 +#define MASK_VSE16_V 0x1df0707f +#define MATCH_VSE256_V 0x10005027 +#define MASK_VSE256_V 0x1df0707f +#define MATCH_VSE32_V 0x6027 +#define MASK_VSE32_V 0x1df0707f +#define MATCH_VSE512_V 0x10006027 +#define MASK_VSE512_V 0x1df0707f +#define MATCH_VSE64_V 0x7027 +#define MASK_VSE64_V 0x1df0707f +#define MATCH_VSE8_V 0x27 +#define MASK_VSE8_V 0x1df0707f +#define MATCH_VSETIVLI 0xc0007057 +#define MASK_VSETIVLI 0xc000707f +#define MATCH_VSETVL 0x80007057 +#define MASK_VSETVL 0xfe00707f +#define MATCH_VSETVLI 0x7057 +#define MASK_VSETVLI 0x8000707f +#define MATCH_VSEXT_VF2 0x4803a057 +#define MASK_VSEXT_VF2 0xfc0ff07f +#define MATCH_VSEXT_VF4 0x4802a057 +#define MASK_VSEXT_VF4 0xfc0ff07f +#define MATCH_VSEXT_VF8 0x4801a057 +#define MASK_VSEXT_VF8 0xfc0ff07f +#define MATCH_VSLIDE1DOWN_VX 0x3c006057 +#define MASK_VSLIDE1DOWN_VX 0xfc00707f +#define MATCH_VSLIDE1UP_VX 0x38006057 +#define MASK_VSLIDE1UP_VX 0xfc00707f +#define MATCH_VSLIDEDOWN_VI 0x3c003057 +#define MASK_VSLIDEDOWN_VI 0xfc00707f +#define MATCH_VSLIDEDOWN_VX 0x3c004057 +#define MASK_VSLIDEDOWN_VX 0xfc00707f +#define MATCH_VSLIDEUP_VI 0x38003057 +#define MASK_VSLIDEUP_VI 0xfc00707f +#define MATCH_VSLIDEUP_VX 0x38004057 +#define MASK_VSLIDEUP_VX 0xfc00707f +#define MATCH_VSLL_VI 0x94003057 +#define MASK_VSLL_VI 0xfc00707f +#define MATCH_VSLL_VV 0x94000057 +#define MASK_VSLL_VV 0xfc00707f +#define MATCH_VSLL_VX 0x94004057 +#define MASK_VSLL_VX 0xfc00707f +#define MATCH_VSM_V 0x2b00027 +#define MASK_VSM_V 0xfff0707f +#define MATCH_VSMUL_VV 0x9c000057 +#define MASK_VSMUL_VV 0xfc00707f +#define MATCH_VSMUL_VX 0x9c004057 +#define MASK_VSMUL_VX 0xfc00707f +#define MATCH_VSOXEI1024_V 0x1c007027 +#define MASK_VSOXEI1024_V 0x1c00707f +#define MATCH_VSOXEI128_V 0x1c000027 +#define MASK_VSOXEI128_V 0x1c00707f +#define MATCH_VSOXEI16_V 0xc005027 +#define MASK_VSOXEI16_V 0x1c00707f +#define MATCH_VSOXEI256_V 0x1c005027 +#define MASK_VSOXEI256_V 0x1c00707f +#define MATCH_VSOXEI32_V 0xc006027 +#define MASK_VSOXEI32_V 0x1c00707f +#define MATCH_VSOXEI512_V 0x1c006027 +#define MASK_VSOXEI512_V 0x1c00707f +#define MATCH_VSOXEI64_V 0xc007027 +#define MASK_VSOXEI64_V 0x1c00707f +#define MATCH_VSOXEI8_V 0xc000027 +#define MASK_VSOXEI8_V 0x1c00707f +#define MATCH_VSRA_VI 0xa4003057 +#define MASK_VSRA_VI 0xfc00707f +#define MATCH_VSRA_VV 0xa4000057 +#define MASK_VSRA_VV 0xfc00707f +#define MATCH_VSRA_VX 0xa4004057 +#define MASK_VSRA_VX 0xfc00707f +#define MATCH_VSRL_VI 0xa0003057 +#define MASK_VSRL_VI 0xfc00707f +#define MATCH_VSRL_VV 0xa0000057 +#define MASK_VSRL_VV 0xfc00707f +#define MATCH_VSRL_VX 0xa0004057 +#define MASK_VSRL_VX 0xfc00707f +#define MATCH_VSSE1024_V 0x18007027 +#define MASK_VSSE1024_V 0x1c00707f +#define MATCH_VSSE128_V 0x18000027 +#define MASK_VSSE128_V 0x1c00707f +#define MATCH_VSSE16_V 0x8005027 +#define MASK_VSSE16_V 0x1c00707f +#define MATCH_VSSE256_V 0x18005027 +#define MASK_VSSE256_V 0x1c00707f +#define MATCH_VSSE32_V 0x8006027 +#define MASK_VSSE32_V 0x1c00707f +#define MATCH_VSSE512_V 0x18006027 +#define MASK_VSSE512_V 0x1c00707f +#define MATCH_VSSE64_V 0x8007027 +#define MASK_VSSE64_V 0x1c00707f +#define MATCH_VSSE8_V 0x8000027 +#define MASK_VSSE8_V 0x1c00707f +#define MATCH_VSSRA_VI 0xac003057 +#define MASK_VSSRA_VI 0xfc00707f +#define MATCH_VSSRA_VV 0xac000057 +#define MASK_VSSRA_VV 0xfc00707f +#define MATCH_VSSRA_VX 0xac004057 +#define MASK_VSSRA_VX 0xfc00707f +#define MATCH_VSSRL_VI 0xa8003057 +#define MASK_VSSRL_VI 0xfc00707f +#define MATCH_VSSRL_VV 0xa8000057 +#define MASK_VSSRL_VV 0xfc00707f +#define MATCH_VSSRL_VX 0xa8004057 +#define MASK_VSSRL_VX 0xfc00707f +#define MATCH_VSSUB_VV 0x8c000057 +#define MASK_VSSUB_VV 0xfc00707f +#define MATCH_VSSUB_VX 0x8c004057 +#define MASK_VSSUB_VX 0xfc00707f +#define MATCH_VSSUBU_VV 0x88000057 +#define MASK_VSSUBU_VV 0xfc00707f +#define MATCH_VSSUBU_VX 0x88004057 +#define MASK_VSSUBU_VX 0xfc00707f +#define MATCH_VSUB_VV 0x8000057 +#define MASK_VSUB_VV 0xfc00707f +#define MATCH_VSUB_VX 0x8004057 +#define MASK_VSUB_VX 0xfc00707f +#define MATCH_VSUXEI1024_V 0x14007027 +#define MASK_VSUXEI1024_V 0x1c00707f +#define MATCH_VSUXEI128_V 0x14000027 +#define MASK_VSUXEI128_V 0x1c00707f +#define MATCH_VSUXEI16_V 0x4005027 +#define MASK_VSUXEI16_V 0x1c00707f +#define MATCH_VSUXEI256_V 0x14005027 +#define MASK_VSUXEI256_V 0x1c00707f +#define MATCH_VSUXEI32_V 0x4006027 +#define MASK_VSUXEI32_V 0x1c00707f +#define MATCH_VSUXEI512_V 0x14006027 +#define MASK_VSUXEI512_V 0x1c00707f +#define MATCH_VSUXEI64_V 0x4007027 +#define MASK_VSUXEI64_V 0x1c00707f +#define MATCH_VSUXEI8_V 0x4000027 +#define MASK_VSUXEI8_V 0x1c00707f +#define MATCH_VWADD_VV 0xc4002057 +#define MASK_VWADD_VV 0xfc00707f +#define MATCH_VWADD_VX 0xc4006057 +#define MASK_VWADD_VX 0xfc00707f +#define MATCH_VWADD_WV 0xd4002057 +#define MASK_VWADD_WV 0xfc00707f +#define MATCH_VWADD_WX 0xd4006057 +#define MASK_VWADD_WX 0xfc00707f +#define MATCH_VWADDU_VV 0xc0002057 +#define MASK_VWADDU_VV 0xfc00707f +#define MATCH_VWADDU_VX 0xc0006057 +#define MASK_VWADDU_VX 0xfc00707f +#define MATCH_VWADDU_WV 0xd0002057 +#define MASK_VWADDU_WV 0xfc00707f +#define MATCH_VWADDU_WX 0xd0006057 +#define MASK_VWADDU_WX 0xfc00707f +#define MATCH_VWMACC_VV 0xf4002057 +#define MASK_VWMACC_VV 0xfc00707f +#define MATCH_VWMACC_VX 0xf4006057 +#define MASK_VWMACC_VX 0xfc00707f +#define MATCH_VWMACCSU_VV 0xfc002057 +#define MASK_VWMACCSU_VV 0xfc00707f +#define MATCH_VWMACCSU_VX 0xfc006057 +#define MASK_VWMACCSU_VX 0xfc00707f +#define MATCH_VWMACCU_VV 0xf0002057 +#define MASK_VWMACCU_VV 0xfc00707f +#define MATCH_VWMACCU_VX 0xf0006057 +#define MASK_VWMACCU_VX 0xfc00707f +#define MATCH_VWMACCUS_VX 0xf8006057 +#define MASK_VWMACCUS_VX 0xfc00707f +#define MATCH_VWMUL_VV 0xec002057 +#define MASK_VWMUL_VV 0xfc00707f +#define MATCH_VWMUL_VX 0xec006057 +#define MASK_VWMUL_VX 0xfc00707f +#define MATCH_VWMULSU_VV 0xe8002057 +#define MASK_VWMULSU_VV 0xfc00707f +#define MATCH_VWMULSU_VX 0xe8006057 +#define MASK_VWMULSU_VX 0xfc00707f +#define MATCH_VWMULU_VV 0xe0002057 +#define MASK_VWMULU_VV 0xfc00707f +#define MATCH_VWMULU_VX 0xe0006057 +#define MASK_VWMULU_VX 0xfc00707f +#define MATCH_VWREDSUM_VS 0xc4000057 +#define MASK_VWREDSUM_VS 0xfc00707f +#define MATCH_VWREDSUMU_VS 0xc0000057 +#define MASK_VWREDSUMU_VS 0xfc00707f +#define MATCH_VWSUB_VV 0xcc002057 +#define MASK_VWSUB_VV 0xfc00707f +#define MATCH_VWSUB_VX 0xcc006057 +#define MASK_VWSUB_VX 0xfc00707f +#define MATCH_VWSUB_WV 0xdc002057 +#define MASK_VWSUB_WV 0xfc00707f +#define MATCH_VWSUB_WX 0xdc006057 +#define MASK_VWSUB_WX 0xfc00707f +#define MATCH_VWSUBU_VV 0xc8002057 +#define MASK_VWSUBU_VV 0xfc00707f +#define MATCH_VWSUBU_VX 0xc8006057 +#define MASK_VWSUBU_VX 0xfc00707f +#define MATCH_VWSUBU_WV 0xd8002057 +#define MASK_VWSUBU_WV 0xfc00707f +#define MATCH_VWSUBU_WX 0xd8006057 +#define MASK_VWSUBU_WX 0xfc00707f +#define MATCH_VXOR_VI 0x2c003057 +#define MASK_VXOR_VI 0xfc00707f +#define MATCH_VXOR_VV 0x2c000057 +#define MASK_VXOR_VV 0xfc00707f +#define MATCH_VXOR_VX 0x2c004057 +#define MASK_VXOR_VX 0xfc00707f +#define MATCH_VZEXT_VF2 0x48032057 +#define MASK_VZEXT_VF2 0xfc0ff07f +#define MATCH_VZEXT_VF4 0x48022057 +#define MASK_VZEXT_VF4 0xfc0ff07f +#define MATCH_VZEXT_VF8 0x48012057 +#define MASK_VZEXT_VF8 0xfc0ff07f +#define MATCH_WFI 0x10500073 +#define MASK_WFI 0xffffffff +#define MATCH_WRS_NTO 0xd00073 +#define MASK_WRS_NTO 0xffffffff +#define MATCH_WRS_STO 0x1d00073 +#define MASK_WRS_STO 0xffffffff +#define MATCH_XNOR 0x40004033 +#define MASK_XNOR 0xfe00707f +#define MATCH_XOR 0x4033 +#define MASK_XOR 0xfe00707f +#define MATCH_XORI 0x4013 +#define MASK_XORI 0x707f +#define MATCH_XPERM16 0x28006033 +#define MASK_XPERM16 0xfe00707f +#define MATCH_XPERM32 0x28000033 +#define MASK_XPERM32 0xfe00707f +#define MATCH_XPERM4 0x28002033 +#define MASK_XPERM4 0xfe00707f +#define MATCH_XPERM8 0x28004033 +#define MASK_XPERM8 0xfe00707f +#define MATCH_ZUNPKD810 0xacc00077 +#define MASK_ZUNPKD810 0xfff0707f +#define MATCH_ZUNPKD820 0xacd00077 +#define MASK_ZUNPKD820 0xfff0707f +#define MATCH_ZUNPKD830 0xace00077 +#define MASK_ZUNPKD830 0xfff0707f +#define MATCH_ZUNPKD831 0xacf00077 +#define MASK_ZUNPKD831 0xfff0707f +#define MATCH_ZUNPKD832 0xad700077 +#define MASK_ZUNPKD832 0xfff0707f + +#define CSR_FFLAGS 0x1 +#define CSR_FRM 0x2 +#define CSR_FCSR 0x3 +#define CSR_VSTART 0x8 +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa +#define CSR_VCSR 0xf +#define CSR_SEED 0x15 +#define CSR_JVT 0x17 +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 +#define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 +#define CSR_VLENB 0xc22 +#define CSR_SSTATUS 0x100 +#define CSR_SEDELEG 0x102 +#define CSR_SIDELEG 0x103 +#define CSR_SIE 0x104 +#define CSR_STVEC 0x105 +#define CSR_SCOUNTEREN 0x106 +#define CSR_SENVCFG 0x10a +#define CSR_SSTATEEN0 0x10c +#define CSR_SSTATEEN1 0x10d +#define CSR_SSTATEEN2 0x10e +#define CSR_SSTATEEN3 0x10f +#define CSR_SSCRATCH 0x140 +#define CSR_SEPC 0x141 +#define CSR_SCAUSE 0x142 +#define CSR_STVAL 0x143 +#define CSR_SIP 0x144 +#define CSR_STIMECMP 0x14d +#define CSR_SISELECT 0x150 +#define CSR_SIREG 0x151 +#define CSR_STOPEI 0x15c +#define CSR_SATP 0x180 +#define CSR_SCONTEXT 0x5a8 +#define CSR_VSSTATUS 0x200 +#define CSR_VSIE 0x204 +#define CSR_VSTVEC 0x205 +#define CSR_VSSCRATCH 0x240 +#define CSR_VSEPC 0x241 +#define CSR_VSCAUSE 0x242 +#define CSR_VSTVAL 0x243 +#define CSR_VSIP 0x244 +#define CSR_VSTIMECMP 0x24d +#define CSR_VSISELECT 0x250 +#define CSR_VSIREG 0x251 +#define CSR_VSTOPEI 0x25c +#define CSR_VSATP 0x280 +#define CSR_HSTATUS 0x600 +#define CSR_HEDELEG 0x602 +#define CSR_HIDELEG 0x603 +#define CSR_HIE 0x604 +#define CSR_HTIMEDELTA 0x605 +#define CSR_HCOUNTEREN 0x606 +#define CSR_HGEIE 0x607 +#define CSR_HVIEN 0x608 +#define CSR_HVICTL 0x609 +#define CSR_HENVCFG 0x60a +#define CSR_HSTATEEN0 0x60c +#define CSR_HSTATEEN1 0x60d +#define CSR_HSTATEEN2 0x60e +#define CSR_HSTATEEN3 0x60f +#define CSR_HTVAL 0x643 +#define CSR_HIP 0x644 +#define CSR_HVIP 0x645 +#define CSR_HVIPRIO1 0x646 +#define CSR_HVIPRIO2 0x647 +#define CSR_HTINST 0x64a +#define CSR_HGATP 0x680 +#define CSR_HCONTEXT 0x6a8 +#define CSR_HGEIP 0xe12 +#define CSR_VSTOPI 0xeb0 +#define CSR_SCOUNTOVF 0xda0 +#define CSR_STOPI 0xdb0 +#define CSR_UTVT 0x7 +#define CSR_UNXTI 0x45 +#define CSR_UINTSTATUS 0x46 +#define CSR_USCRATCHCSW 0x48 +#define CSR_USCRATCHCSWL 0x49 +#define CSR_STVT 0x107 +#define CSR_SNXTI 0x145 +#define CSR_SINTSTATUS 0x146 +#define CSR_SSCRATCHCSW 0x148 +#define CSR_SSCRATCHCSWL 0x149 +#define CSR_MTVT 0x307 +#define CSR_MNXTI 0x345 +#define CSR_MINTSTATUS 0x346 +#define CSR_MSCRATCHCSW 0x348 +#define CSR_MSCRATCHCSWL 0x349 +#define CSR_MSTATUS 0x300 +#define CSR_MISA 0x301 +#define CSR_MEDELEG 0x302 +#define CSR_MIDELEG 0x303 +#define CSR_MIE 0x304 +#define CSR_MTVEC 0x305 +#define CSR_MCOUNTEREN 0x306 +#define CSR_MVIEN 0x308 +#define CSR_MVIP 0x309 +#define CSR_MENVCFG 0x30a +#define CSR_MSTATEEN0 0x30c +#define CSR_MSTATEEN1 0x30d +#define CSR_MSTATEEN2 0x30e +#define CSR_MSTATEEN3 0x30f +#define CSR_MCOUNTINHIBIT 0x320 +#define CSR_MSCRATCH 0x340 +#define CSR_MEPC 0x341 +#define CSR_MCAUSE 0x342 +#define CSR_MTVAL 0x343 +#define CSR_MIP 0x344 +#define CSR_MTINST 0x34a +#define CSR_MTVAL2 0x34b +#define CSR_MISELECT 0x350 +#define CSR_MIREG 0x351 +#define CSR_MTOPEI 0x35c +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPCFG1 0x3a1 +#define CSR_PMPCFG2 0x3a2 +#define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af +#define CSR_PMPADDR0 0x3b0 +#define CSR_PMPADDR1 0x3b1 +#define CSR_PMPADDR2 0x3b2 +#define CSR_PMPADDR3 0x3b3 +#define CSR_PMPADDR4 0x3b4 +#define CSR_PMPADDR5 0x3b5 +#define CSR_PMPADDR6 0x3b6 +#define CSR_PMPADDR7 0x3b7 +#define CSR_PMPADDR8 0x3b8 +#define CSR_PMPADDR9 0x3b9 +#define CSR_PMPADDR10 0x3ba +#define CSR_PMPADDR11 0x3bb +#define CSR_PMPADDR12 0x3bc +#define CSR_PMPADDR13 0x3bd +#define CSR_PMPADDR14 0x3be +#define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef +#define CSR_MSECCFG 0x747 +#define CSR_TSELECT 0x7a0 +#define CSR_TDATA1 0x7a1 +#define CSR_TDATA2 0x7a2 +#define CSR_TDATA3 0x7a3 +#define CSR_TINFO 0x7a4 +#define CSR_TCONTROL 0x7a5 +#define CSR_MCONTEXT 0x7a8 +#define CSR_MSCONTEXT 0x7aa +#define CSR_DCSR 0x7b0 +#define CSR_DPC 0x7b1 +#define CSR_DSCRATCH0 0x7b2 +#define CSR_DSCRATCH1 0x7b3 +#define CSR_MCYCLE 0xb00 +#define CSR_MINSTRET 0xb02 +#define CSR_MHPMCOUNTER3 0xb03 +#define CSR_MHPMCOUNTER4 0xb04 +#define CSR_MHPMCOUNTER5 0xb05 +#define CSR_MHPMCOUNTER6 0xb06 +#define CSR_MHPMCOUNTER7 0xb07 +#define CSR_MHPMCOUNTER8 0xb08 +#define CSR_MHPMCOUNTER9 0xb09 +#define CSR_MHPMCOUNTER10 0xb0a +#define CSR_MHPMCOUNTER11 0xb0b +#define CSR_MHPMCOUNTER12 0xb0c +#define CSR_MHPMCOUNTER13 0xb0d +#define CSR_MHPMCOUNTER14 0xb0e +#define CSR_MHPMCOUNTER15 0xb0f +#define CSR_MHPMCOUNTER16 0xb10 +#define CSR_MHPMCOUNTER17 0xb11 +#define CSR_MHPMCOUNTER18 0xb12 +#define CSR_MHPMCOUNTER19 0xb13 +#define CSR_MHPMCOUNTER20 0xb14 +#define CSR_MHPMCOUNTER21 0xb15 +#define CSR_MHPMCOUNTER22 0xb16 +#define CSR_MHPMCOUNTER23 0xb17 +#define CSR_MHPMCOUNTER24 0xb18 +#define CSR_MHPMCOUNTER25 0xb19 +#define CSR_MHPMCOUNTER26 0xb1a +#define CSR_MHPMCOUNTER27 0xb1b +#define CSR_MHPMCOUNTER28 0xb1c +#define CSR_MHPMCOUNTER29 0xb1d +#define CSR_MHPMCOUNTER30 0xb1e +#define CSR_MHPMCOUNTER31 0xb1f +#define CSR_MHPMEVENT3 0x323 +#define CSR_MHPMEVENT4 0x324 +#define CSR_MHPMEVENT5 0x325 +#define CSR_MHPMEVENT6 0x326 +#define CSR_MHPMEVENT7 0x327 +#define CSR_MHPMEVENT8 0x328 +#define CSR_MHPMEVENT9 0x329 +#define CSR_MHPMEVENT10 0x32a +#define CSR_MHPMEVENT11 0x32b +#define CSR_MHPMEVENT12 0x32c +#define CSR_MHPMEVENT13 0x32d +#define CSR_MHPMEVENT14 0x32e +#define CSR_MHPMEVENT15 0x32f +#define CSR_MHPMEVENT16 0x330 +#define CSR_MHPMEVENT17 0x331 +#define CSR_MHPMEVENT18 0x332 +#define CSR_MHPMEVENT19 0x333 +#define CSR_MHPMEVENT20 0x334 +#define CSR_MHPMEVENT21 0x335 +#define CSR_MHPMEVENT22 0x336 +#define CSR_MHPMEVENT23 0x337 +#define CSR_MHPMEVENT24 0x338 +#define CSR_MHPMEVENT25 0x339 +#define CSR_MHPMEVENT26 0x33a +#define CSR_MHPMEVENT27 0x33b +#define CSR_MHPMEVENT28 0x33c +#define CSR_MHPMEVENT29 0x33d +#define CSR_MHPMEVENT30 0x33e +#define CSR_MHPMEVENT31 0x33f +#define CSR_MVENDORID 0xf11 +#define CSR_MARCHID 0xf12 +#define CSR_MIMPID 0xf13 +#define CSR_MHARTID 0xf14 +#define CSR_MCONFIGPTR 0xf15 +#define CSR_MTOPI 0xfb0 +#define CSR_SIEH 0x114 +#define CSR_SIPH 0x154 +#define CSR_STIMECMPH 0x15d +#define CSR_VSIEH 0x214 +#define CSR_VSIPH 0x254 +#define CSR_VSTIMECMPH 0x25d +#define CSR_HTIMEDELTAH 0x615 +#define CSR_HIDELEGH 0x613 +#define CSR_HVIENH 0x618 +#define CSR_HENVCFGH 0x61a +#define CSR_HVIPH 0x655 +#define CSR_HVIPRIO1H 0x656 +#define CSR_HVIPRIO2H 0x657 +#define CSR_HSTATEEN0H 0x61c +#define CSR_HSTATEEN1H 0x61d +#define CSR_HSTATEEN2H 0x61e +#define CSR_HSTATEEN3H 0x61f +#define CSR_CYCLEH 0xc80 +#define CSR_TIMEH 0xc81 +#define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f +#define CSR_MSTATUSH 0x310 +#define CSR_MIDELEGH 0x313 +#define CSR_MIEH 0x314 +#define CSR_MVIENH 0x318 +#define CSR_MVIPH 0x319 +#define CSR_MENVCFGH 0x31a +#define CSR_MSTATEEN0H 0x31c +#define CSR_MSTATEEN1H 0x31d +#define CSR_MSTATEEN2H 0x31e +#define CSR_MSTATEEN3H 0x31f +#define CSR_MIPH 0x354 +#define CSR_MHPMEVENT3H 0x723 +#define CSR_MHPMEVENT4H 0x724 +#define CSR_MHPMEVENT5H 0x725 +#define CSR_MHPMEVENT6H 0x726 +#define CSR_MHPMEVENT7H 0x727 +#define CSR_MHPMEVENT8H 0x728 +#define CSR_MHPMEVENT9H 0x729 +#define CSR_MHPMEVENT10H 0x72a +#define CSR_MHPMEVENT11H 0x72b +#define CSR_MHPMEVENT12H 0x72c +#define CSR_MHPMEVENT13H 0x72d +#define CSR_MHPMEVENT14H 0x72e +#define CSR_MHPMEVENT15H 0x72f +#define CSR_MHPMEVENT16H 0x730 +#define CSR_MHPMEVENT17H 0x731 +#define CSR_MHPMEVENT18H 0x732 +#define CSR_MHPMEVENT19H 0x733 +#define CSR_MHPMEVENT20H 0x734 +#define CSR_MHPMEVENT21H 0x735 +#define CSR_MHPMEVENT22H 0x736 +#define CSR_MHPMEVENT23H 0x737 +#define CSR_MHPMEVENT24H 0x738 +#define CSR_MHPMEVENT25H 0x739 +#define CSR_MHPMEVENT26H 0x73a +#define CSR_MHPMEVENT27H 0x73b +#define CSR_MHPMEVENT28H 0x73c +#define CSR_MHPMEVENT29H 0x73d +#define CSR_MHPMEVENT30H 0x73e +#define CSR_MHPMEVENT31H 0x73f +#define CSR_MNSCRATCH 0x740 +#define CSR_MNEPC 0x741 +#define CSR_MNCAUSE 0x742 +#define CSR_MNSTATUS 0x744 +#define CSR_MSECCFGH 0x757 +#define CSR_MCYCLEH 0xb80 +#define CSR_MINSTRETH 0xb82 +#define CSR_MHPMCOUNTER3H 0xb83 +#define CSR_MHPMCOUNTER4H 0xb84 +#define CSR_MHPMCOUNTER5H 0xb85 +#define CSR_MHPMCOUNTER6H 0xb86 +#define CSR_MHPMCOUNTER7H 0xb87 +#define CSR_MHPMCOUNTER8H 0xb88 +#define CSR_MHPMCOUNTER9H 0xb89 +#define CSR_MHPMCOUNTER10H 0xb8a +#define CSR_MHPMCOUNTER11H 0xb8b +#define CSR_MHPMCOUNTER12H 0xb8c +#define CSR_MHPMCOUNTER13H 0xb8d +#define CSR_MHPMCOUNTER14H 0xb8e +#define CSR_MHPMCOUNTER15H 0xb8f +#define CSR_MHPMCOUNTER16H 0xb90 +#define CSR_MHPMCOUNTER17H 0xb91 +#define CSR_MHPMCOUNTER18H 0xb92 +#define CSR_MHPMCOUNTER19H 0xb93 +#define CSR_MHPMCOUNTER20H 0xb94 +#define CSR_MHPMCOUNTER21H 0xb95 +#define CSR_MHPMCOUNTER22H 0xb96 +#define CSR_MHPMCOUNTER23H 0xb97 +#define CSR_MHPMCOUNTER24H 0xb98 +#define CSR_MHPMCOUNTER25H 0xb99 +#define CSR_MHPMCOUNTER26H 0xb9a +#define CSR_MHPMCOUNTER27H 0xb9b +#define CSR_MHPMCOUNTER28H 0xb9c +#define CSR_MHPMCOUNTER29H 0xb9d +#define CSR_MHPMCOUNTER30H 0xb9e +#define CSR_MHPMCOUNTER31H 0xb9f + +#define CAUSE_MISALIGNED_FETCH 0x0 +#define CAUSE_FETCH_ACCESS 0x1 +#define CAUSE_ILLEGAL_INSTRUCTION 0x2 +#define CAUSE_BREAKPOINT 0x3 +#define CAUSE_MISALIGNED_LOAD 0x4 +#define CAUSE_LOAD_ACCESS 0x5 +#define CAUSE_MISALIGNED_STORE 0x6 +#define CAUSE_STORE_ACCESS 0x7 +#define CAUSE_USER_ECALL 0x8 +#define CAUSE_SUPERVISOR_ECALL 0x9 +#define CAUSE_VIRTUAL_SUPERVISOR_ECALL 0xa +#define CAUSE_MACHINE_ECALL 0xb +#define CAUSE_FETCH_PAGE_FAULT 0xc +#define CAUSE_LOAD_PAGE_FAULT 0xd +#define CAUSE_STORE_PAGE_FAULT 0xf +#define CAUSE_FETCH_GUEST_PAGE_FAULT 0x14 +#define CAUSE_LOAD_GUEST_PAGE_FAULT 0x15 +#define CAUSE_VIRTUAL_INSTRUCTION 0x16 +#define CAUSE_STORE_GUEST_PAGE_FAULT 0x17 + +#define INSN_FIELD_RD 0xf80 +#define INSN_FIELD_RT 0xf8000 +#define INSN_FIELD_RS1 0xf8000 +#define INSN_FIELD_RS2 0x1f00000 +#define INSN_FIELD_RS3 0xf8000000 +#define INSN_FIELD_AQRL 0x6000000 +#define INSN_FIELD_AQ 0x4000000 +#define INSN_FIELD_RL 0x2000000 +#define INSN_FIELD_FM 0xf0000000 +#define INSN_FIELD_PRED 0xf000000 +#define INSN_FIELD_SUCC 0xf00000 +#define INSN_FIELD_RM 0x7000 +#define INSN_FIELD_FUNCT3 0x7000 +#define INSN_FIELD_FUNCT2 0x6000000 +#define INSN_FIELD_IMM20 0xfffff000 +#define INSN_FIELD_JIMM20 0xfffff000 +#define INSN_FIELD_IMM12 0xfff00000 +#define INSN_FIELD_CSR 0xfff00000 +#define INSN_FIELD_IMM12HI 0xfe000000 +#define INSN_FIELD_BIMM12HI 0xfe000000 +#define INSN_FIELD_IMM12LO 0xf80 +#define INSN_FIELD_BIMM12LO 0xf80 +#define INSN_FIELD_ZIMM 0xf8000 +#define INSN_FIELD_SHAMTQ 0x7f00000 +#define INSN_FIELD_SHAMTW 0x1f00000 +#define INSN_FIELD_SHAMTW4 0xf00000 +#define INSN_FIELD_SHAMTD 0x3f00000 +#define INSN_FIELD_BS 0xc0000000 +#define INSN_FIELD_RNUM 0xf00000 +#define INSN_FIELD_RC 0x3e000000 +#define INSN_FIELD_IMM2 0x300000 +#define INSN_FIELD_IMM3 0x700000 +#define INSN_FIELD_IMM4 0xf00000 +#define INSN_FIELD_IMM5 0x1f00000 +#define INSN_FIELD_IMM6 0x3f00000 +#define INSN_FIELD_OPCODE 0x7f +#define INSN_FIELD_FUNCT7 0xfe000000 +#define INSN_FIELD_VD 0xf80 +#define INSN_FIELD_VS3 0xf80 +#define INSN_FIELD_VS1 0xf8000 +#define INSN_FIELD_VS2 0x1f00000 +#define INSN_FIELD_VM 0x2000000 +#define INSN_FIELD_WD 0x4000000 +#define INSN_FIELD_AMOOP 0xf8000000 +#define INSN_FIELD_NF 0xe0000000 +#define INSN_FIELD_SIMM5 0xf8000 +#define INSN_FIELD_ZIMM10 0x3ff00000 +#define INSN_FIELD_ZIMM11 0x7ff00000 +#define INSN_FIELD_C_NZUIMM10 0x1fe0 +#define INSN_FIELD_C_UIMM7LO 0x60 +#define INSN_FIELD_C_UIMM7HI 0x1c00 +#define INSN_FIELD_C_UIMM8LO 0x60 +#define INSN_FIELD_C_UIMM8HI 0x1c00 +#define INSN_FIELD_C_UIMM9LO 0x60 +#define INSN_FIELD_C_UIMM9HI 0x1c00 +#define INSN_FIELD_C_NZIMM6LO 0x7c +#define INSN_FIELD_C_NZIMM6HI 0x1000 +#define INSN_FIELD_C_IMM6LO 0x7c +#define INSN_FIELD_C_IMM6HI 0x1000 +#define INSN_FIELD_C_NZIMM10HI 0x1000 +#define INSN_FIELD_C_NZIMM10LO 0x7c +#define INSN_FIELD_C_NZIMM18HI 0x1000 +#define INSN_FIELD_C_NZIMM18LO 0x7c +#define INSN_FIELD_C_IMM12 0x1ffc +#define INSN_FIELD_C_BIMM9LO 0x7c +#define INSN_FIELD_C_BIMM9HI 0x1c00 +#define INSN_FIELD_C_NZUIMM5 0x7c +#define INSN_FIELD_C_NZUIMM6LO 0x7c +#define INSN_FIELD_C_NZUIMM6HI 0x1000 +#define INSN_FIELD_C_UIMM8SPLO 0x7c +#define INSN_FIELD_C_UIMM8SPHI 0x1000 +#define INSN_FIELD_C_UIMM8SP_S 0x1f80 +#define INSN_FIELD_C_UIMM10SPLO 0x7c +#define INSN_FIELD_C_UIMM10SPHI 0x1000 +#define INSN_FIELD_C_UIMM9SPLO 0x7c +#define INSN_FIELD_C_UIMM9SPHI 0x1000 +#define INSN_FIELD_C_UIMM10SP_S 0x1f80 +#define INSN_FIELD_C_UIMM9SP_S 0x1f80 +#define INSN_FIELD_C_UIMM2 0x60 +#define INSN_FIELD_C_UIMM1 0x20 +#define INSN_FIELD_C_RLIST 0xf0 +#define INSN_FIELD_C_SPIMM 0xc +#define INSN_FIELD_C_INDEX 0x3fc +#define INSN_FIELD_RS1_P 0x380 +#define INSN_FIELD_RS2_P 0x1c +#define INSN_FIELD_RD_P 0x1c +#define INSN_FIELD_RD_RS1_N0 0xf80 +#define INSN_FIELD_RD_RS1_P 0x380 +#define INSN_FIELD_RD_RS1 0xf80 +#define INSN_FIELD_RD_N2 0xf80 +#define INSN_FIELD_RD_N0 0xf80 +#define INSN_FIELD_RS1_N0 0xf80 +#define INSN_FIELD_C_RS2_N0 0x7c +#define INSN_FIELD_C_RS1_N0 0xf80 +#define INSN_FIELD_C_RS2 0x7c +#define INSN_FIELD_C_SREG1 0x380 +#define INSN_FIELD_C_SREG2 0x1c +#endif +#ifdef DECLARE_INSN +DECLARE_INSN(add, MATCH_ADD, MASK_ADD) +DECLARE_INSN(add16, MATCH_ADD16, MASK_ADD16) +DECLARE_INSN(add32, MATCH_ADD32, MASK_ADD32) +DECLARE_INSN(add64, MATCH_ADD64, MASK_ADD64) +DECLARE_INSN(add8, MATCH_ADD8, MASK_ADD8) +DECLARE_INSN(add_uw, MATCH_ADD_UW, MASK_ADD_UW) +DECLARE_INSN(addi, MATCH_ADDI, MASK_ADDI) +DECLARE_INSN(addiw, MATCH_ADDIW, MASK_ADDIW) +DECLARE_INSN(addw, MATCH_ADDW, MASK_ADDW) +DECLARE_INSN(aes32dsi, MATCH_AES32DSI, MASK_AES32DSI) +DECLARE_INSN(aes32dsmi, MATCH_AES32DSMI, MASK_AES32DSMI) +DECLARE_INSN(aes32esi, MATCH_AES32ESI, MASK_AES32ESI) +DECLARE_INSN(aes32esmi, MATCH_AES32ESMI, MASK_AES32ESMI) +DECLARE_INSN(aes64ds, MATCH_AES64DS, MASK_AES64DS) +DECLARE_INSN(aes64dsm, MATCH_AES64DSM, MASK_AES64DSM) +DECLARE_INSN(aes64es, MATCH_AES64ES, MASK_AES64ES) +DECLARE_INSN(aes64esm, MATCH_AES64ESM, MASK_AES64ESM) +DECLARE_INSN(aes64im, MATCH_AES64IM, MASK_AES64IM) +DECLARE_INSN(aes64ks1i, MATCH_AES64KS1I, MASK_AES64KS1I) +DECLARE_INSN(aes64ks2, MATCH_AES64KS2, MASK_AES64KS2) +DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) +DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) +DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) +DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) +DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) +DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) +DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) +DECLARE_INSN(amomaxu_w, MATCH_AMOMAXU_W, MASK_AMOMAXU_W) +DECLARE_INSN(amomin_d, MATCH_AMOMIN_D, MASK_AMOMIN_D) +DECLARE_INSN(amomin_w, MATCH_AMOMIN_W, MASK_AMOMIN_W) +DECLARE_INSN(amominu_d, MATCH_AMOMINU_D, MASK_AMOMINU_D) +DECLARE_INSN(amominu_w, MATCH_AMOMINU_W, MASK_AMOMINU_W) +DECLARE_INSN(amoor_d, MATCH_AMOOR_D, MASK_AMOOR_D) +DECLARE_INSN(amoor_w, MATCH_AMOOR_W, MASK_AMOOR_W) +DECLARE_INSN(amoswap_d, MATCH_AMOSWAP_D, MASK_AMOSWAP_D) +DECLARE_INSN(amoswap_w, MATCH_AMOSWAP_W, MASK_AMOSWAP_W) +DECLARE_INSN(amoxor_d, MATCH_AMOXOR_D, MASK_AMOXOR_D) +DECLARE_INSN(amoxor_w, MATCH_AMOXOR_W, MASK_AMOXOR_W) +DECLARE_INSN(and, MATCH_AND, MASK_AND) +DECLARE_INSN(andi, MATCH_ANDI, MASK_ANDI) +DECLARE_INSN(andn, MATCH_ANDN, MASK_ANDN) +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) +DECLARE_INSN(ave, MATCH_AVE, MASK_AVE) +DECLARE_INSN(bclr, MATCH_BCLR, MASK_BCLR) +DECLARE_INSN(bclri, MATCH_BCLRI, MASK_BCLRI) +DECLARE_INSN(bcompress, MATCH_BCOMPRESS, MASK_BCOMPRESS) +DECLARE_INSN(bcompressw, MATCH_BCOMPRESSW, MASK_BCOMPRESSW) +DECLARE_INSN(bdecompress, MATCH_BDECOMPRESS, MASK_BDECOMPRESS) +DECLARE_INSN(bdecompressw, MATCH_BDECOMPRESSW, MASK_BDECOMPRESSW) +DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) +DECLARE_INSN(bext, MATCH_BEXT, MASK_BEXT) +DECLARE_INSN(bexti, MATCH_BEXTI, MASK_BEXTI) +DECLARE_INSN(bfp, MATCH_BFP, MASK_BFP) +DECLARE_INSN(bfpw, MATCH_BFPW, MASK_BFPW) +DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) +DECLARE_INSN(bgeu, MATCH_BGEU, MASK_BGEU) +DECLARE_INSN(binv, MATCH_BINV, MASK_BINV) +DECLARE_INSN(binvi, MATCH_BINVI, MASK_BINVI) +DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) +DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) +DECLARE_INSN(bmatflip, MATCH_BMATFLIP, MASK_BMATFLIP) +DECLARE_INSN(bmator, MATCH_BMATOR, MASK_BMATOR) +DECLARE_INSN(bmatxor, MATCH_BMATXOR, MASK_BMATXOR) +DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) +DECLARE_INSN(bset, MATCH_BSET, MASK_BSET) +DECLARE_INSN(bseti, MATCH_BSETI, MASK_BSETI) +DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) +DECLARE_INSN(c_addi, MATCH_C_ADDI, MASK_C_ADDI) +DECLARE_INSN(c_addi16sp, MATCH_C_ADDI16SP, MASK_C_ADDI16SP) +DECLARE_INSN(c_addi4spn, MATCH_C_ADDI4SPN, MASK_C_ADDI4SPN) +DECLARE_INSN(c_addiw, MATCH_C_ADDIW, MASK_C_ADDIW) +DECLARE_INSN(c_addw, MATCH_C_ADDW, MASK_C_ADDW) +DECLARE_INSN(c_and, MATCH_C_AND, MASK_C_AND) +DECLARE_INSN(c_andi, MATCH_C_ANDI, MASK_C_ANDI) +DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) +DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) +DECLARE_INSN(c_ebreak, MATCH_C_EBREAK, MASK_C_EBREAK) +DECLARE_INSN(c_fld, MATCH_C_FLD, MASK_C_FLD) +DECLARE_INSN(c_fldsp, MATCH_C_FLDSP, MASK_C_FLDSP) +DECLARE_INSN(c_flw, MATCH_C_FLW, MASK_C_FLW) +DECLARE_INSN(c_flwsp, MATCH_C_FLWSP, MASK_C_FLWSP) +DECLARE_INSN(c_fsd, MATCH_C_FSD, MASK_C_FSD) +DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) +DECLARE_INSN(c_fsw, MATCH_C_FSW, MASK_C_FSW) +DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) +DECLARE_INSN(c_j, MATCH_C_J, MASK_C_J) +DECLARE_INSN(c_jal, MATCH_C_JAL, MASK_C_JAL) +DECLARE_INSN(c_jalr, MATCH_C_JALR, MASK_C_JALR) +DECLARE_INSN(c_jr, MATCH_C_JR, MASK_C_JR) +DECLARE_INSN(c_lbu, MATCH_C_LBU, MASK_C_LBU) +DECLARE_INSN(c_ld, MATCH_C_LD, MASK_C_LD) +DECLARE_INSN(c_ldsp, MATCH_C_LDSP, MASK_C_LDSP) +DECLARE_INSN(c_lh, MATCH_C_LH, MASK_C_LH) +DECLARE_INSN(c_lhu, MATCH_C_LHU, MASK_C_LHU) +DECLARE_INSN(c_li, MATCH_C_LI, MASK_C_LI) +DECLARE_INSN(c_lui, MATCH_C_LUI, MASK_C_LUI) +DECLARE_INSN(c_lw, MATCH_C_LW, MASK_C_LW) +DECLARE_INSN(c_lwsp, MATCH_C_LWSP, MASK_C_LWSP) +DECLARE_INSN(c_mul, MATCH_C_MUL, MASK_C_MUL) +DECLARE_INSN(c_mv, MATCH_C_MV, MASK_C_MV) +DECLARE_INSN(c_nop, MATCH_C_NOP, MASK_C_NOP) +DECLARE_INSN(c_not, MATCH_C_NOT, MASK_C_NOT) +DECLARE_INSN(c_or, MATCH_C_OR, MASK_C_OR) +DECLARE_INSN(c_sb, MATCH_C_SB, MASK_C_SB) +DECLARE_INSN(c_sd, MATCH_C_SD, MASK_C_SD) +DECLARE_INSN(c_sdsp, MATCH_C_SDSP, MASK_C_SDSP) +DECLARE_INSN(c_sext_b, MATCH_C_SEXT_B, MASK_C_SEXT_B) +DECLARE_INSN(c_sext_h, MATCH_C_SEXT_H, MASK_C_SEXT_H) +DECLARE_INSN(c_sh, MATCH_C_SH, MASK_C_SH) +DECLARE_INSN(c_slli, MATCH_C_SLLI, MASK_C_SLLI) +DECLARE_INSN(c_srai, MATCH_C_SRAI, MASK_C_SRAI) +DECLARE_INSN(c_srli, MATCH_C_SRLI, MASK_C_SRLI) +DECLARE_INSN(c_sub, MATCH_C_SUB, MASK_C_SUB) +DECLARE_INSN(c_subw, MATCH_C_SUBW, MASK_C_SUBW) +DECLARE_INSN(c_sw, MATCH_C_SW, MASK_C_SW) +DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) +DECLARE_INSN(c_xor, MATCH_C_XOR, MASK_C_XOR) +DECLARE_INSN(c_zext_b, MATCH_C_ZEXT_B, MASK_C_ZEXT_B) +DECLARE_INSN(c_zext_h, MATCH_C_ZEXT_H, MASK_C_ZEXT_H) +DECLARE_INSN(c_zext_w, MATCH_C_ZEXT_W, MASK_C_ZEXT_W) +DECLARE_INSN(cbo_clean, MATCH_CBO_CLEAN, MASK_CBO_CLEAN) +DECLARE_INSN(cbo_flush, MATCH_CBO_FLUSH, MASK_CBO_FLUSH) +DECLARE_INSN(cbo_inval, MATCH_CBO_INVAL, MASK_CBO_INVAL) +DECLARE_INSN(cbo_zero, MATCH_CBO_ZERO, MASK_CBO_ZERO) +DECLARE_INSN(clmul, MATCH_CLMUL, MASK_CLMUL) +DECLARE_INSN(clmulh, MATCH_CLMULH, MASK_CLMULH) +DECLARE_INSN(clmulr, MATCH_CLMULR, MASK_CLMULR) +DECLARE_INSN(clrs16, MATCH_CLRS16, MASK_CLRS16) +DECLARE_INSN(clrs32, MATCH_CLRS32, MASK_CLRS32) +DECLARE_INSN(clrs8, MATCH_CLRS8, MASK_CLRS8) +DECLARE_INSN(clz, MATCH_CLZ, MASK_CLZ) +DECLARE_INSN(clz16, MATCH_CLZ16, MASK_CLZ16) +DECLARE_INSN(clz32, MATCH_CLZ32, MASK_CLZ32) +DECLARE_INSN(clz8, MATCH_CLZ8, MASK_CLZ8) +DECLARE_INSN(clzw, MATCH_CLZW, MASK_CLZW) +DECLARE_INSN(cm_jalt, MATCH_CM_JALT, MASK_CM_JALT) +DECLARE_INSN(cm_mva01s, MATCH_CM_MVA01S, MASK_CM_MVA01S) +DECLARE_INSN(cm_mvsa01, MATCH_CM_MVSA01, MASK_CM_MVSA01) +DECLARE_INSN(cm_pop, MATCH_CM_POP, MASK_CM_POP) +DECLARE_INSN(cm_popret, MATCH_CM_POPRET, MASK_CM_POPRET) +DECLARE_INSN(cm_popretz, MATCH_CM_POPRETZ, MASK_CM_POPRETZ) +DECLARE_INSN(cm_push, MATCH_CM_PUSH, MASK_CM_PUSH) +DECLARE_INSN(cmix, MATCH_CMIX, MASK_CMIX) +DECLARE_INSN(cmov, MATCH_CMOV, MASK_CMOV) +DECLARE_INSN(cmpeq16, MATCH_CMPEQ16, MASK_CMPEQ16) +DECLARE_INSN(cmpeq8, MATCH_CMPEQ8, MASK_CMPEQ8) +DECLARE_INSN(cpop, MATCH_CPOP, MASK_CPOP) +DECLARE_INSN(cpopw, MATCH_CPOPW, MASK_CPOPW) +DECLARE_INSN(cras16, MATCH_CRAS16, MASK_CRAS16) +DECLARE_INSN(cras32, MATCH_CRAS32, MASK_CRAS32) +DECLARE_INSN(crc32_b, MATCH_CRC32_B, MASK_CRC32_B) +DECLARE_INSN(crc32_d, MATCH_CRC32_D, MASK_CRC32_D) +DECLARE_INSN(crc32_h, MATCH_CRC32_H, MASK_CRC32_H) +DECLARE_INSN(crc32_w, MATCH_CRC32_W, MASK_CRC32_W) +DECLARE_INSN(crc32c_b, MATCH_CRC32C_B, MASK_CRC32C_B) +DECLARE_INSN(crc32c_d, MATCH_CRC32C_D, MASK_CRC32C_D) +DECLARE_INSN(crc32c_h, MATCH_CRC32C_H, MASK_CRC32C_H) +DECLARE_INSN(crc32c_w, MATCH_CRC32C_W, MASK_CRC32C_W) +DECLARE_INSN(crsa16, MATCH_CRSA16, MASK_CRSA16) +DECLARE_INSN(crsa32, MATCH_CRSA32, MASK_CRSA32) +DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) +DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) +DECLARE_INSN(csrrs, MATCH_CSRRS, MASK_CSRRS) +DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) +DECLARE_INSN(csrrw, MATCH_CSRRW, MASK_CSRRW) +DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) +DECLARE_INSN(ctz, MATCH_CTZ, MASK_CTZ) +DECLARE_INSN(ctzw, MATCH_CTZW, MASK_CTZW) +DECLARE_INSN(czero_eqz, MATCH_CZERO_EQZ, MASK_CZERO_EQZ) +DECLARE_INSN(czero_nez, MATCH_CZERO_NEZ, MASK_CZERO_NEZ) +DECLARE_INSN(div, MATCH_DIV, MASK_DIV) +DECLARE_INSN(divu, MATCH_DIVU, MASK_DIVU) +DECLARE_INSN(divuw, MATCH_DIVUW, MASK_DIVUW) +DECLARE_INSN(divw, MATCH_DIVW, MASK_DIVW) +DECLARE_INSN(dret, MATCH_DRET, MASK_DRET) +DECLARE_INSN(ebreak, MATCH_EBREAK, MASK_EBREAK) +DECLARE_INSN(ecall, MATCH_ECALL, MASK_ECALL) +DECLARE_INSN(fadd_d, MATCH_FADD_D, MASK_FADD_D) +DECLARE_INSN(fadd_h, MATCH_FADD_H, MASK_FADD_H) +DECLARE_INSN(fadd_q, MATCH_FADD_Q, MASK_FADD_Q) +DECLARE_INSN(fadd_s, MATCH_FADD_S, MASK_FADD_S) +DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) +DECLARE_INSN(fclass_h, MATCH_FCLASS_H, MASK_FCLASS_H) +DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) +DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) +DECLARE_INSN(fcvt_d_h, MATCH_FCVT_D_H, MASK_FCVT_D_H) +DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) +DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) +DECLARE_INSN(fcvt_d_q, MATCH_FCVT_D_Q, MASK_FCVT_D_Q) +DECLARE_INSN(fcvt_d_s, MATCH_FCVT_D_S, MASK_FCVT_D_S) +DECLARE_INSN(fcvt_d_w, MATCH_FCVT_D_W, MASK_FCVT_D_W) +DECLARE_INSN(fcvt_d_wu, MATCH_FCVT_D_WU, MASK_FCVT_D_WU) +DECLARE_INSN(fcvt_h_d, MATCH_FCVT_H_D, MASK_FCVT_H_D) +DECLARE_INSN(fcvt_h_l, MATCH_FCVT_H_L, MASK_FCVT_H_L) +DECLARE_INSN(fcvt_h_lu, MATCH_FCVT_H_LU, MASK_FCVT_H_LU) +DECLARE_INSN(fcvt_h_q, MATCH_FCVT_H_Q, MASK_FCVT_H_Q) +DECLARE_INSN(fcvt_h_s, MATCH_FCVT_H_S, MASK_FCVT_H_S) +DECLARE_INSN(fcvt_h_w, MATCH_FCVT_H_W, MASK_FCVT_H_W) +DECLARE_INSN(fcvt_h_wu, MATCH_FCVT_H_WU, MASK_FCVT_H_WU) +DECLARE_INSN(fcvt_l_d, MATCH_FCVT_L_D, MASK_FCVT_L_D) +DECLARE_INSN(fcvt_l_h, MATCH_FCVT_L_H, MASK_FCVT_L_H) +DECLARE_INSN(fcvt_l_q, MATCH_FCVT_L_Q, MASK_FCVT_L_Q) +DECLARE_INSN(fcvt_l_s, MATCH_FCVT_L_S, MASK_FCVT_L_S) +DECLARE_INSN(fcvt_lu_d, MATCH_FCVT_LU_D, MASK_FCVT_LU_D) +DECLARE_INSN(fcvt_lu_h, MATCH_FCVT_LU_H, MASK_FCVT_LU_H) +DECLARE_INSN(fcvt_lu_q, MATCH_FCVT_LU_Q, MASK_FCVT_LU_Q) +DECLARE_INSN(fcvt_lu_s, MATCH_FCVT_LU_S, MASK_FCVT_LU_S) +DECLARE_INSN(fcvt_q_d, MATCH_FCVT_Q_D, MASK_FCVT_Q_D) +DECLARE_INSN(fcvt_q_h, MATCH_FCVT_Q_H, MASK_FCVT_Q_H) +DECLARE_INSN(fcvt_q_l, MATCH_FCVT_Q_L, MASK_FCVT_Q_L) +DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) +DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) +DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) +DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) +DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) +DECLARE_INSN(fcvt_s_h, MATCH_FCVT_S_H, MASK_FCVT_S_H) +DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) +DECLARE_INSN(fcvt_s_lu, MATCH_FCVT_S_LU, MASK_FCVT_S_LU) +DECLARE_INSN(fcvt_s_q, MATCH_FCVT_S_Q, MASK_FCVT_S_Q) +DECLARE_INSN(fcvt_s_w, MATCH_FCVT_S_W, MASK_FCVT_S_W) +DECLARE_INSN(fcvt_s_wu, MATCH_FCVT_S_WU, MASK_FCVT_S_WU) +DECLARE_INSN(fcvt_w_d, MATCH_FCVT_W_D, MASK_FCVT_W_D) +DECLARE_INSN(fcvt_w_h, MATCH_FCVT_W_H, MASK_FCVT_W_H) +DECLARE_INSN(fcvt_w_q, MATCH_FCVT_W_Q, MASK_FCVT_W_Q) +DECLARE_INSN(fcvt_w_s, MATCH_FCVT_W_S, MASK_FCVT_W_S) +DECLARE_INSN(fcvt_wu_d, MATCH_FCVT_WU_D, MASK_FCVT_WU_D) +DECLARE_INSN(fcvt_wu_h, MATCH_FCVT_WU_H, MASK_FCVT_WU_H) +DECLARE_INSN(fcvt_wu_q, MATCH_FCVT_WU_Q, MASK_FCVT_WU_Q) +DECLARE_INSN(fcvt_wu_s, MATCH_FCVT_WU_S, MASK_FCVT_WU_S) +DECLARE_INSN(fdiv_d, MATCH_FDIV_D, MASK_FDIV_D) +DECLARE_INSN(fdiv_h, MATCH_FDIV_H, MASK_FDIV_H) +DECLARE_INSN(fdiv_q, MATCH_FDIV_Q, MASK_FDIV_Q) +DECLARE_INSN(fdiv_s, MATCH_FDIV_S, MASK_FDIV_S) +DECLARE_INSN(fence, MATCH_FENCE, MASK_FENCE) +DECLARE_INSN(fence_i, MATCH_FENCE_I, MASK_FENCE_I) +DECLARE_INSN(feq_d, MATCH_FEQ_D, MASK_FEQ_D) +DECLARE_INSN(feq_h, MATCH_FEQ_H, MASK_FEQ_H) +DECLARE_INSN(feq_q, MATCH_FEQ_Q, MASK_FEQ_Q) +DECLARE_INSN(feq_s, MATCH_FEQ_S, MASK_FEQ_S) +DECLARE_INSN(fld, MATCH_FLD, MASK_FLD) +DECLARE_INSN(fle_d, MATCH_FLE_D, MASK_FLE_D) +DECLARE_INSN(fle_h, MATCH_FLE_H, MASK_FLE_H) +DECLARE_INSN(fle_q, MATCH_FLE_Q, MASK_FLE_Q) +DECLARE_INSN(fle_s, MATCH_FLE_S, MASK_FLE_S) +DECLARE_INSN(flh, MATCH_FLH, MASK_FLH) +DECLARE_INSN(flq, MATCH_FLQ, MASK_FLQ) +DECLARE_INSN(flt_d, MATCH_FLT_D, MASK_FLT_D) +DECLARE_INSN(flt_h, MATCH_FLT_H, MASK_FLT_H) +DECLARE_INSN(flt_q, MATCH_FLT_Q, MASK_FLT_Q) +DECLARE_INSN(flt_s, MATCH_FLT_S, MASK_FLT_S) +DECLARE_INSN(flw, MATCH_FLW, MASK_FLW) +DECLARE_INSN(fmadd_d, MATCH_FMADD_D, MASK_FMADD_D) +DECLARE_INSN(fmadd_h, MATCH_FMADD_H, MASK_FMADD_H) +DECLARE_INSN(fmadd_q, MATCH_FMADD_Q, MASK_FMADD_Q) +DECLARE_INSN(fmadd_s, MATCH_FMADD_S, MASK_FMADD_S) +DECLARE_INSN(fmax_d, MATCH_FMAX_D, MASK_FMAX_D) +DECLARE_INSN(fmax_h, MATCH_FMAX_H, MASK_FMAX_H) +DECLARE_INSN(fmax_q, MATCH_FMAX_Q, MASK_FMAX_Q) +DECLARE_INSN(fmax_s, MATCH_FMAX_S, MASK_FMAX_S) +DECLARE_INSN(fmin_d, MATCH_FMIN_D, MASK_FMIN_D) +DECLARE_INSN(fmin_h, MATCH_FMIN_H, MASK_FMIN_H) +DECLARE_INSN(fmin_q, MATCH_FMIN_Q, MASK_FMIN_Q) +DECLARE_INSN(fmin_s, MATCH_FMIN_S, MASK_FMIN_S) +DECLARE_INSN(fmsub_d, MATCH_FMSUB_D, MASK_FMSUB_D) +DECLARE_INSN(fmsub_h, MATCH_FMSUB_H, MASK_FMSUB_H) +DECLARE_INSN(fmsub_q, MATCH_FMSUB_Q, MASK_FMSUB_Q) +DECLARE_INSN(fmsub_s, MATCH_FMSUB_S, MASK_FMSUB_S) +DECLARE_INSN(fmul_d, MATCH_FMUL_D, MASK_FMUL_D) +DECLARE_INSN(fmul_h, MATCH_FMUL_H, MASK_FMUL_H) +DECLARE_INSN(fmul_q, MATCH_FMUL_Q, MASK_FMUL_Q) +DECLARE_INSN(fmul_s, MATCH_FMUL_S, MASK_FMUL_S) +DECLARE_INSN(fmv_d_x, MATCH_FMV_D_X, MASK_FMV_D_X) +DECLARE_INSN(fmv_h_x, MATCH_FMV_H_X, MASK_FMV_H_X) +DECLARE_INSN(fmv_w_x, MATCH_FMV_W_X, MASK_FMV_W_X) +DECLARE_INSN(fmv_x_d, MATCH_FMV_X_D, MASK_FMV_X_D) +DECLARE_INSN(fmv_x_h, MATCH_FMV_X_H, MASK_FMV_X_H) +DECLARE_INSN(fmv_x_w, MATCH_FMV_X_W, MASK_FMV_X_W) +DECLARE_INSN(fnmadd_d, MATCH_FNMADD_D, MASK_FNMADD_D) +DECLARE_INSN(fnmadd_h, MATCH_FNMADD_H, MASK_FNMADD_H) +DECLARE_INSN(fnmadd_q, MATCH_FNMADD_Q, MASK_FNMADD_Q) +DECLARE_INSN(fnmadd_s, MATCH_FNMADD_S, MASK_FNMADD_S) +DECLARE_INSN(fnmsub_d, MATCH_FNMSUB_D, MASK_FNMSUB_D) +DECLARE_INSN(fnmsub_h, MATCH_FNMSUB_H, MASK_FNMSUB_H) +DECLARE_INSN(fnmsub_q, MATCH_FNMSUB_Q, MASK_FNMSUB_Q) +DECLARE_INSN(fnmsub_s, MATCH_FNMSUB_S, MASK_FNMSUB_S) +DECLARE_INSN(fsd, MATCH_FSD, MASK_FSD) +DECLARE_INSN(fsgnj_d, MATCH_FSGNJ_D, MASK_FSGNJ_D) +DECLARE_INSN(fsgnj_h, MATCH_FSGNJ_H, MASK_FSGNJ_H) +DECLARE_INSN(fsgnj_q, MATCH_FSGNJ_Q, MASK_FSGNJ_Q) +DECLARE_INSN(fsgnj_s, MATCH_FSGNJ_S, MASK_FSGNJ_S) +DECLARE_INSN(fsgnjn_d, MATCH_FSGNJN_D, MASK_FSGNJN_D) +DECLARE_INSN(fsgnjn_h, MATCH_FSGNJN_H, MASK_FSGNJN_H) +DECLARE_INSN(fsgnjn_q, MATCH_FSGNJN_Q, MASK_FSGNJN_Q) +DECLARE_INSN(fsgnjn_s, MATCH_FSGNJN_S, MASK_FSGNJN_S) +DECLARE_INSN(fsgnjx_d, MATCH_FSGNJX_D, MASK_FSGNJX_D) +DECLARE_INSN(fsgnjx_h, MATCH_FSGNJX_H, MASK_FSGNJX_H) +DECLARE_INSN(fsgnjx_q, MATCH_FSGNJX_Q, MASK_FSGNJX_Q) +DECLARE_INSN(fsgnjx_s, MATCH_FSGNJX_S, MASK_FSGNJX_S) +DECLARE_INSN(fsh, MATCH_FSH, MASK_FSH) +DECLARE_INSN(fsl, MATCH_FSL, MASK_FSL) +DECLARE_INSN(fslw, MATCH_FSLW, MASK_FSLW) +DECLARE_INSN(fsq, MATCH_FSQ, MASK_FSQ) +DECLARE_INSN(fsqrt_d, MATCH_FSQRT_D, MASK_FSQRT_D) +DECLARE_INSN(fsqrt_h, MATCH_FSQRT_H, MASK_FSQRT_H) +DECLARE_INSN(fsqrt_q, MATCH_FSQRT_Q, MASK_FSQRT_Q) +DECLARE_INSN(fsqrt_s, MATCH_FSQRT_S, MASK_FSQRT_S) +DECLARE_INSN(fsr, MATCH_FSR, MASK_FSR) +DECLARE_INSN(fsri, MATCH_FSRI, MASK_FSRI) +DECLARE_INSN(fsriw, MATCH_FSRIW, MASK_FSRIW) +DECLARE_INSN(fsrw, MATCH_FSRW, MASK_FSRW) +DECLARE_INSN(fsub_d, MATCH_FSUB_D, MASK_FSUB_D) +DECLARE_INSN(fsub_h, MATCH_FSUB_H, MASK_FSUB_H) +DECLARE_INSN(fsub_q, MATCH_FSUB_Q, MASK_FSUB_Q) +DECLARE_INSN(fsub_s, MATCH_FSUB_S, MASK_FSUB_S) +DECLARE_INSN(fsw, MATCH_FSW, MASK_FSW) +DECLARE_INSN(gorc, MATCH_GORC, MASK_GORC) +DECLARE_INSN(gorci, MATCH_GORCI, MASK_GORCI) +DECLARE_INSN(gorciw, MATCH_GORCIW, MASK_GORCIW) +DECLARE_INSN(gorcw, MATCH_GORCW, MASK_GORCW) +DECLARE_INSN(grev, MATCH_GREV, MASK_GREV) +DECLARE_INSN(grevi, MATCH_GREVI, MASK_GREVI) +DECLARE_INSN(greviw, MATCH_GREVIW, MASK_GREVIW) +DECLARE_INSN(grevw, MATCH_GREVW, MASK_GREVW) +DECLARE_INSN(hfence_gvma, MATCH_HFENCE_GVMA, MASK_HFENCE_GVMA) +DECLARE_INSN(hfence_vvma, MATCH_HFENCE_VVMA, MASK_HFENCE_VVMA) +DECLARE_INSN(hinval_gvma, MATCH_HINVAL_GVMA, MASK_HINVAL_GVMA) +DECLARE_INSN(hinval_vvma, MATCH_HINVAL_VVMA, MASK_HINVAL_VVMA) +DECLARE_INSN(hlv_b, MATCH_HLV_B, MASK_HLV_B) +DECLARE_INSN(hlv_bu, MATCH_HLV_BU, MASK_HLV_BU) +DECLARE_INSN(hlv_d, MATCH_HLV_D, MASK_HLV_D) +DECLARE_INSN(hlv_h, MATCH_HLV_H, MASK_HLV_H) +DECLARE_INSN(hlv_hu, MATCH_HLV_HU, MASK_HLV_HU) +DECLARE_INSN(hlv_w, MATCH_HLV_W, MASK_HLV_W) +DECLARE_INSN(hlv_wu, MATCH_HLV_WU, MASK_HLV_WU) +DECLARE_INSN(hlvx_hu, MATCH_HLVX_HU, MASK_HLVX_HU) +DECLARE_INSN(hlvx_wu, MATCH_HLVX_WU, MASK_HLVX_WU) +DECLARE_INSN(hsv_b, MATCH_HSV_B, MASK_HSV_B) +DECLARE_INSN(hsv_d, MATCH_HSV_D, MASK_HSV_D) +DECLARE_INSN(hsv_h, MATCH_HSV_H, MASK_HSV_H) +DECLARE_INSN(hsv_w, MATCH_HSV_W, MASK_HSV_W) +DECLARE_INSN(insb, MATCH_INSB, MASK_INSB) +DECLARE_INSN(jal, MATCH_JAL, MASK_JAL) +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) +DECLARE_INSN(kabs16, MATCH_KABS16, MASK_KABS16) +DECLARE_INSN(kabs32, MATCH_KABS32, MASK_KABS32) +DECLARE_INSN(kabs8, MATCH_KABS8, MASK_KABS8) +DECLARE_INSN(kabsw, MATCH_KABSW, MASK_KABSW) +DECLARE_INSN(kadd16, MATCH_KADD16, MASK_KADD16) +DECLARE_INSN(kadd32, MATCH_KADD32, MASK_KADD32) +DECLARE_INSN(kadd64, MATCH_KADD64, MASK_KADD64) +DECLARE_INSN(kadd8, MATCH_KADD8, MASK_KADD8) +DECLARE_INSN(kaddh, MATCH_KADDH, MASK_KADDH) +DECLARE_INSN(kaddw, MATCH_KADDW, MASK_KADDW) +DECLARE_INSN(kcras16, MATCH_KCRAS16, MASK_KCRAS16) +DECLARE_INSN(kcras32, MATCH_KCRAS32, MASK_KCRAS32) +DECLARE_INSN(kcrsa16, MATCH_KCRSA16, MASK_KCRSA16) +DECLARE_INSN(kcrsa32, MATCH_KCRSA32, MASK_KCRSA32) +DECLARE_INSN(kdmabb, MATCH_KDMABB, MASK_KDMABB) +DECLARE_INSN(kdmabb16, MATCH_KDMABB16, MASK_KDMABB16) +DECLARE_INSN(kdmabt, MATCH_KDMABT, MASK_KDMABT) +DECLARE_INSN(kdmabt16, MATCH_KDMABT16, MASK_KDMABT16) +DECLARE_INSN(kdmatt, MATCH_KDMATT, MASK_KDMATT) +DECLARE_INSN(kdmatt16, MATCH_KDMATT16, MASK_KDMATT16) +DECLARE_INSN(kdmbb, MATCH_KDMBB, MASK_KDMBB) +DECLARE_INSN(kdmbb16, MATCH_KDMBB16, MASK_KDMBB16) +DECLARE_INSN(kdmbt, MATCH_KDMBT, MASK_KDMBT) +DECLARE_INSN(kdmbt16, MATCH_KDMBT16, MASK_KDMBT16) +DECLARE_INSN(kdmtt, MATCH_KDMTT, MASK_KDMTT) +DECLARE_INSN(kdmtt16, MATCH_KDMTT16, MASK_KDMTT16) +DECLARE_INSN(khm16, MATCH_KHM16, MASK_KHM16) +DECLARE_INSN(khm8, MATCH_KHM8, MASK_KHM8) +DECLARE_INSN(khmbb, MATCH_KHMBB, MASK_KHMBB) +DECLARE_INSN(khmbb16, MATCH_KHMBB16, MASK_KHMBB16) +DECLARE_INSN(khmbt, MATCH_KHMBT, MASK_KHMBT) +DECLARE_INSN(khmbt16, MATCH_KHMBT16, MASK_KHMBT16) +DECLARE_INSN(khmtt, MATCH_KHMTT, MASK_KHMTT) +DECLARE_INSN(khmtt16, MATCH_KHMTT16, MASK_KHMTT16) +DECLARE_INSN(khmx16, MATCH_KHMX16, MASK_KHMX16) +DECLARE_INSN(khmx8, MATCH_KHMX8, MASK_KHMX8) +DECLARE_INSN(kmabb, MATCH_KMABB, MASK_KMABB) +DECLARE_INSN(kmabb32, MATCH_KMABB32, MASK_KMABB32) +DECLARE_INSN(kmabt, MATCH_KMABT, MASK_KMABT) +DECLARE_INSN(kmabt32, MATCH_KMABT32, MASK_KMABT32) +DECLARE_INSN(kmada, MATCH_KMADA, MASK_KMADA) +DECLARE_INSN(kmadrs, MATCH_KMADRS, MASK_KMADRS) +DECLARE_INSN(kmadrs32, MATCH_KMADRS32, MASK_KMADRS32) +DECLARE_INSN(kmads, MATCH_KMADS, MASK_KMADS) +DECLARE_INSN(kmads32, MATCH_KMADS32, MASK_KMADS32) +DECLARE_INSN(kmar64, MATCH_KMAR64, MASK_KMAR64) +DECLARE_INSN(kmatt, MATCH_KMATT, MASK_KMATT) +DECLARE_INSN(kmatt32, MATCH_KMATT32, MASK_KMATT32) +DECLARE_INSN(kmaxda, MATCH_KMAXDA, MASK_KMAXDA) +DECLARE_INSN(kmaxda32, MATCH_KMAXDA32, MASK_KMAXDA32) +DECLARE_INSN(kmaxds, MATCH_KMAXDS, MASK_KMAXDS) +DECLARE_INSN(kmaxds32, MATCH_KMAXDS32, MASK_KMAXDS32) +DECLARE_INSN(kmda, MATCH_KMDA, MASK_KMDA) +DECLARE_INSN(kmda32, MATCH_KMDA32, MASK_KMDA32) +DECLARE_INSN(kmmac, MATCH_KMMAC, MASK_KMMAC) +DECLARE_INSN(kmmac_u, MATCH_KMMAC_U, MASK_KMMAC_U) +DECLARE_INSN(kmmawb, MATCH_KMMAWB, MASK_KMMAWB) +DECLARE_INSN(kmmawb2, MATCH_KMMAWB2, MASK_KMMAWB2) +DECLARE_INSN(kmmawb2_u, MATCH_KMMAWB2_U, MASK_KMMAWB2_U) +DECLARE_INSN(kmmawb_u, MATCH_KMMAWB_U, MASK_KMMAWB_U) +DECLARE_INSN(kmmawt, MATCH_KMMAWT, MASK_KMMAWT) +DECLARE_INSN(kmmawt2, MATCH_KMMAWT2, MASK_KMMAWT2) +DECLARE_INSN(kmmawt2_u, MATCH_KMMAWT2_U, MASK_KMMAWT2_U) +DECLARE_INSN(kmmawt_u, MATCH_KMMAWT_U, MASK_KMMAWT_U) +DECLARE_INSN(kmmsb, MATCH_KMMSB, MASK_KMMSB) +DECLARE_INSN(kmmsb_u, MATCH_KMMSB_U, MASK_KMMSB_U) +DECLARE_INSN(kmmwb2, MATCH_KMMWB2, MASK_KMMWB2) +DECLARE_INSN(kmmwb2_u, MATCH_KMMWB2_U, MASK_KMMWB2_U) +DECLARE_INSN(kmmwt2, MATCH_KMMWT2, MASK_KMMWT2) +DECLARE_INSN(kmmwt2_u, MATCH_KMMWT2_U, MASK_KMMWT2_U) +DECLARE_INSN(kmsda, MATCH_KMSDA, MASK_KMSDA) +DECLARE_INSN(kmsda32, MATCH_KMSDA32, MASK_KMSDA32) +DECLARE_INSN(kmsr64, MATCH_KMSR64, MASK_KMSR64) +DECLARE_INSN(kmsxda, MATCH_KMSXDA, MASK_KMSXDA) +DECLARE_INSN(kmsxda32, MATCH_KMSXDA32, MASK_KMSXDA32) +DECLARE_INSN(kmxda, MATCH_KMXDA, MASK_KMXDA) +DECLARE_INSN(kmxda32, MATCH_KMXDA32, MASK_KMXDA32) +DECLARE_INSN(ksll16, MATCH_KSLL16, MASK_KSLL16) +DECLARE_INSN(ksll32, MATCH_KSLL32, MASK_KSLL32) +DECLARE_INSN(ksll8, MATCH_KSLL8, MASK_KSLL8) +DECLARE_INSN(kslli16, MATCH_KSLLI16, MASK_KSLLI16) +DECLARE_INSN(kslli32, MATCH_KSLLI32, MASK_KSLLI32) +DECLARE_INSN(kslli8, MATCH_KSLLI8, MASK_KSLLI8) +DECLARE_INSN(kslliw, MATCH_KSLLIW, MASK_KSLLIW) +DECLARE_INSN(ksllw, MATCH_KSLLW, MASK_KSLLW) +DECLARE_INSN(kslra16, MATCH_KSLRA16, MASK_KSLRA16) +DECLARE_INSN(kslra16_u, MATCH_KSLRA16_U, MASK_KSLRA16_U) +DECLARE_INSN(kslra32, MATCH_KSLRA32, MASK_KSLRA32) +DECLARE_INSN(kslra32_u, MATCH_KSLRA32_U, MASK_KSLRA32_U) +DECLARE_INSN(kslra8, MATCH_KSLRA8, MASK_KSLRA8) +DECLARE_INSN(kslra8_u, MATCH_KSLRA8_U, MASK_KSLRA8_U) +DECLARE_INSN(kslraw, MATCH_KSLRAW, MASK_KSLRAW) +DECLARE_INSN(kslraw_u, MATCH_KSLRAW_U, MASK_KSLRAW_U) +DECLARE_INSN(kstas16, MATCH_KSTAS16, MASK_KSTAS16) +DECLARE_INSN(kstas32, MATCH_KSTAS32, MASK_KSTAS32) +DECLARE_INSN(kstsa16, MATCH_KSTSA16, MASK_KSTSA16) +DECLARE_INSN(kstsa32, MATCH_KSTSA32, MASK_KSTSA32) +DECLARE_INSN(ksub16, MATCH_KSUB16, MASK_KSUB16) +DECLARE_INSN(ksub32, MATCH_KSUB32, MASK_KSUB32) +DECLARE_INSN(ksub64, MATCH_KSUB64, MASK_KSUB64) +DECLARE_INSN(ksub8, MATCH_KSUB8, MASK_KSUB8) +DECLARE_INSN(ksubh, MATCH_KSUBH, MASK_KSUBH) +DECLARE_INSN(ksubw, MATCH_KSUBW, MASK_KSUBW) +DECLARE_INSN(kwmmul, MATCH_KWMMUL, MASK_KWMMUL) +DECLARE_INSN(kwmmul_u, MATCH_KWMMUL_U, MASK_KWMMUL_U) +DECLARE_INSN(lb, MATCH_LB, MASK_LB) +DECLARE_INSN(lbu, MATCH_LBU, MASK_LBU) +DECLARE_INSN(ld, MATCH_LD, MASK_LD) +DECLARE_INSN(lh, MATCH_LH, MASK_LH) +DECLARE_INSN(lhu, MATCH_LHU, MASK_LHU) +DECLARE_INSN(lr_d, MATCH_LR_D, MASK_LR_D) +DECLARE_INSN(lr_w, MATCH_LR_W, MASK_LR_W) +DECLARE_INSN(lui, MATCH_LUI, MASK_LUI) +DECLARE_INSN(lw, MATCH_LW, MASK_LW) +DECLARE_INSN(lwu, MATCH_LWU, MASK_LWU) +DECLARE_INSN(maddr32, MATCH_MADDR32, MASK_MADDR32) +DECLARE_INSN(max, MATCH_MAX, MASK_MAX) +DECLARE_INSN(maxu, MATCH_MAXU, MASK_MAXU) +DECLARE_INSN(min, MATCH_MIN, MASK_MIN) +DECLARE_INSN(minu, MATCH_MINU, MASK_MINU) +DECLARE_INSN(mnret, MATCH_MNRET, MASK_MNRET) +DECLARE_INSN(mret, MATCH_MRET, MASK_MRET) +DECLARE_INSN(msubr32, MATCH_MSUBR32, MASK_MSUBR32) +DECLARE_INSN(mul, MATCH_MUL, MASK_MUL) +DECLARE_INSN(mulh, MATCH_MULH, MASK_MULH) +DECLARE_INSN(mulhsu, MATCH_MULHSU, MASK_MULHSU) +DECLARE_INSN(mulhu, MATCH_MULHU, MASK_MULHU) +DECLARE_INSN(mulr64, MATCH_MULR64, MASK_MULR64) +DECLARE_INSN(mulsr64, MATCH_MULSR64, MASK_MULSR64) +DECLARE_INSN(mulw, MATCH_MULW, MASK_MULW) +DECLARE_INSN(or, MATCH_OR, MASK_OR) +DECLARE_INSN(ori, MATCH_ORI, MASK_ORI) +DECLARE_INSN(orn, MATCH_ORN, MASK_ORN) +DECLARE_INSN(pack, MATCH_PACK, MASK_PACK) +DECLARE_INSN(packh, MATCH_PACKH, MASK_PACKH) +DECLARE_INSN(packu, MATCH_PACKU, MASK_PACKU) +DECLARE_INSN(packuw, MATCH_PACKUW, MASK_PACKUW) +DECLARE_INSN(packw, MATCH_PACKW, MASK_PACKW) +DECLARE_INSN(pause, MATCH_PAUSE, MASK_PAUSE) +DECLARE_INSN(pbsad, MATCH_PBSAD, MASK_PBSAD) +DECLARE_INSN(pbsada, MATCH_PBSADA, MASK_PBSADA) +DECLARE_INSN(pkbb16, MATCH_PKBB16, MASK_PKBB16) +DECLARE_INSN(pkbt16, MATCH_PKBT16, MASK_PKBT16) +DECLARE_INSN(pkbt32, MATCH_PKBT32, MASK_PKBT32) +DECLARE_INSN(pktb16, MATCH_PKTB16, MASK_PKTB16) +DECLARE_INSN(pktb32, MATCH_PKTB32, MASK_PKTB32) +DECLARE_INSN(pktt16, MATCH_PKTT16, MASK_PKTT16) +DECLARE_INSN(prefetch_i, MATCH_PREFETCH_I, MASK_PREFETCH_I) +DECLARE_INSN(prefetch_r, MATCH_PREFETCH_R, MASK_PREFETCH_R) +DECLARE_INSN(prefetch_w, MATCH_PREFETCH_W, MASK_PREFETCH_W) +DECLARE_INSN(radd16, MATCH_RADD16, MASK_RADD16) +DECLARE_INSN(radd32, MATCH_RADD32, MASK_RADD32) +DECLARE_INSN(radd64, MATCH_RADD64, MASK_RADD64) +DECLARE_INSN(radd8, MATCH_RADD8, MASK_RADD8) +DECLARE_INSN(raddw, MATCH_RADDW, MASK_RADDW) +DECLARE_INSN(rcras16, MATCH_RCRAS16, MASK_RCRAS16) +DECLARE_INSN(rcras32, MATCH_RCRAS32, MASK_RCRAS32) +DECLARE_INSN(rcrsa16, MATCH_RCRSA16, MASK_RCRSA16) +DECLARE_INSN(rcrsa32, MATCH_RCRSA32, MASK_RCRSA32) +DECLARE_INSN(rem, MATCH_REM, MASK_REM) +DECLARE_INSN(remu, MATCH_REMU, MASK_REMU) +DECLARE_INSN(remuw, MATCH_REMUW, MASK_REMUW) +DECLARE_INSN(remw, MATCH_REMW, MASK_REMW) +DECLARE_INSN(rol, MATCH_ROL, MASK_ROL) +DECLARE_INSN(rolw, MATCH_ROLW, MASK_ROLW) +DECLARE_INSN(ror, MATCH_ROR, MASK_ROR) +DECLARE_INSN(rori, MATCH_RORI, MASK_RORI) +DECLARE_INSN(roriw, MATCH_RORIW, MASK_RORIW) +DECLARE_INSN(rorw, MATCH_RORW, MASK_RORW) +DECLARE_INSN(rstas16, MATCH_RSTAS16, MASK_RSTAS16) +DECLARE_INSN(rstas32, MATCH_RSTAS32, MASK_RSTAS32) +DECLARE_INSN(rstsa16, MATCH_RSTSA16, MASK_RSTSA16) +DECLARE_INSN(rstsa32, MATCH_RSTSA32, MASK_RSTSA32) +DECLARE_INSN(rsub16, MATCH_RSUB16, MASK_RSUB16) +DECLARE_INSN(rsub32, MATCH_RSUB32, MASK_RSUB32) +DECLARE_INSN(rsub64, MATCH_RSUB64, MASK_RSUB64) +DECLARE_INSN(rsub8, MATCH_RSUB8, MASK_RSUB8) +DECLARE_INSN(rsubw, MATCH_RSUBW, MASK_RSUBW) +DECLARE_INSN(sb, MATCH_SB, MASK_SB) +DECLARE_INSN(sc_d, MATCH_SC_D, MASK_SC_D) +DECLARE_INSN(sc_w, MATCH_SC_W, MASK_SC_W) +DECLARE_INSN(sclip16, MATCH_SCLIP16, MASK_SCLIP16) +DECLARE_INSN(sclip32, MATCH_SCLIP32, MASK_SCLIP32) +DECLARE_INSN(sclip8, MATCH_SCLIP8, MASK_SCLIP8) +DECLARE_INSN(scmple16, MATCH_SCMPLE16, MASK_SCMPLE16) +DECLARE_INSN(scmple8, MATCH_SCMPLE8, MASK_SCMPLE8) +DECLARE_INSN(scmplt16, MATCH_SCMPLT16, MASK_SCMPLT16) +DECLARE_INSN(scmplt8, MATCH_SCMPLT8, MASK_SCMPLT8) +DECLARE_INSN(sd, MATCH_SD, MASK_SD) +DECLARE_INSN(sext_b, MATCH_SEXT_B, MASK_SEXT_B) +DECLARE_INSN(sext_h, MATCH_SEXT_H, MASK_SEXT_H) +DECLARE_INSN(sfence_inval_ir, MATCH_SFENCE_INVAL_IR, MASK_SFENCE_INVAL_IR) +DECLARE_INSN(sfence_vma, MATCH_SFENCE_VMA, MASK_SFENCE_VMA) +DECLARE_INSN(sfence_w_inval, MATCH_SFENCE_W_INVAL, MASK_SFENCE_W_INVAL) +DECLARE_INSN(sh, MATCH_SH, MASK_SH) +DECLARE_INSN(sh1add, MATCH_SH1ADD, MASK_SH1ADD) +DECLARE_INSN(sh1add_uw, MATCH_SH1ADD_UW, MASK_SH1ADD_UW) +DECLARE_INSN(sh2add, MATCH_SH2ADD, MASK_SH2ADD) +DECLARE_INSN(sh2add_uw, MATCH_SH2ADD_UW, MASK_SH2ADD_UW) +DECLARE_INSN(sh3add, MATCH_SH3ADD, MASK_SH3ADD) +DECLARE_INSN(sh3add_uw, MATCH_SH3ADD_UW, MASK_SH3ADD_UW) +DECLARE_INSN(sha256sig0, MATCH_SHA256SIG0, MASK_SHA256SIG0) +DECLARE_INSN(sha256sig1, MATCH_SHA256SIG1, MASK_SHA256SIG1) +DECLARE_INSN(sha256sum0, MATCH_SHA256SUM0, MASK_SHA256SUM0) +DECLARE_INSN(sha256sum1, MATCH_SHA256SUM1, MASK_SHA256SUM1) +DECLARE_INSN(sha512sig0, MATCH_SHA512SIG0, MASK_SHA512SIG0) +DECLARE_INSN(sha512sig0h, MATCH_SHA512SIG0H, MASK_SHA512SIG0H) +DECLARE_INSN(sha512sig0l, MATCH_SHA512SIG0L, MASK_SHA512SIG0L) +DECLARE_INSN(sha512sig1, MATCH_SHA512SIG1, MASK_SHA512SIG1) +DECLARE_INSN(sha512sig1h, MATCH_SHA512SIG1H, MASK_SHA512SIG1H) +DECLARE_INSN(sha512sig1l, MATCH_SHA512SIG1L, MASK_SHA512SIG1L) +DECLARE_INSN(sha512sum0, MATCH_SHA512SUM0, MASK_SHA512SUM0) +DECLARE_INSN(sha512sum0r, MATCH_SHA512SUM0R, MASK_SHA512SUM0R) +DECLARE_INSN(sha512sum1, MATCH_SHA512SUM1, MASK_SHA512SUM1) +DECLARE_INSN(sha512sum1r, MATCH_SHA512SUM1R, MASK_SHA512SUM1R) +DECLARE_INSN(shfl, MATCH_SHFL, MASK_SHFL) +DECLARE_INSN(shfli, MATCH_SHFLI, MASK_SHFLI) +DECLARE_INSN(shflw, MATCH_SHFLW, MASK_SHFLW) +DECLARE_INSN(sinval_vma, MATCH_SINVAL_VMA, MASK_SINVAL_VMA) +DECLARE_INSN(sll, MATCH_SLL, MASK_SLL) +DECLARE_INSN(sll16, MATCH_SLL16, MASK_SLL16) +DECLARE_INSN(sll32, MATCH_SLL32, MASK_SLL32) +DECLARE_INSN(sll8, MATCH_SLL8, MASK_SLL8) +DECLARE_INSN(slli, MATCH_SLLI, MASK_SLLI) +DECLARE_INSN(slli16, MATCH_SLLI16, MASK_SLLI16) +DECLARE_INSN(slli32, MATCH_SLLI32, MASK_SLLI32) +DECLARE_INSN(slli8, MATCH_SLLI8, MASK_SLLI8) +DECLARE_INSN(slli_rv32, MATCH_SLLI_RV32, MASK_SLLI_RV32) +DECLARE_INSN(slli_uw, MATCH_SLLI_UW, MASK_SLLI_UW) +DECLARE_INSN(slliw, MATCH_SLLIW, MASK_SLLIW) +DECLARE_INSN(sllw, MATCH_SLLW, MASK_SLLW) +DECLARE_INSN(slo, MATCH_SLO, MASK_SLO) +DECLARE_INSN(sloi, MATCH_SLOI, MASK_SLOI) +DECLARE_INSN(sloiw, MATCH_SLOIW, MASK_SLOIW) +DECLARE_INSN(slow, MATCH_SLOW, MASK_SLOW) +DECLARE_INSN(slt, MATCH_SLT, MASK_SLT) +DECLARE_INSN(slti, MATCH_SLTI, MASK_SLTI) +DECLARE_INSN(sltiu, MATCH_SLTIU, MASK_SLTIU) +DECLARE_INSN(sltu, MATCH_SLTU, MASK_SLTU) +DECLARE_INSN(sm3p0, MATCH_SM3P0, MASK_SM3P0) +DECLARE_INSN(sm3p1, MATCH_SM3P1, MASK_SM3P1) +DECLARE_INSN(sm4ed, MATCH_SM4ED, MASK_SM4ED) +DECLARE_INSN(sm4ks, MATCH_SM4KS, MASK_SM4KS) +DECLARE_INSN(smal, MATCH_SMAL, MASK_SMAL) +DECLARE_INSN(smalbb, MATCH_SMALBB, MASK_SMALBB) +DECLARE_INSN(smalbt, MATCH_SMALBT, MASK_SMALBT) +DECLARE_INSN(smalda, MATCH_SMALDA, MASK_SMALDA) +DECLARE_INSN(smaldrs, MATCH_SMALDRS, MASK_SMALDRS) +DECLARE_INSN(smalds, MATCH_SMALDS, MASK_SMALDS) +DECLARE_INSN(smaltt, MATCH_SMALTT, MASK_SMALTT) +DECLARE_INSN(smalxda, MATCH_SMALXDA, MASK_SMALXDA) +DECLARE_INSN(smalxds, MATCH_SMALXDS, MASK_SMALXDS) +DECLARE_INSN(smaqa, MATCH_SMAQA, MASK_SMAQA) +DECLARE_INSN(smaqa_su, MATCH_SMAQA_SU, MASK_SMAQA_SU) +DECLARE_INSN(smar64, MATCH_SMAR64, MASK_SMAR64) +DECLARE_INSN(smax16, MATCH_SMAX16, MASK_SMAX16) +DECLARE_INSN(smax32, MATCH_SMAX32, MASK_SMAX32) +DECLARE_INSN(smax8, MATCH_SMAX8, MASK_SMAX8) +DECLARE_INSN(smbb16, MATCH_SMBB16, MASK_SMBB16) +DECLARE_INSN(smbt16, MATCH_SMBT16, MASK_SMBT16) +DECLARE_INSN(smbt32, MATCH_SMBT32, MASK_SMBT32) +DECLARE_INSN(smdrs, MATCH_SMDRS, MASK_SMDRS) +DECLARE_INSN(smdrs32, MATCH_SMDRS32, MASK_SMDRS32) +DECLARE_INSN(smds, MATCH_SMDS, MASK_SMDS) +DECLARE_INSN(smds32, MATCH_SMDS32, MASK_SMDS32) +DECLARE_INSN(smin16, MATCH_SMIN16, MASK_SMIN16) +DECLARE_INSN(smin32, MATCH_SMIN32, MASK_SMIN32) +DECLARE_INSN(smin8, MATCH_SMIN8, MASK_SMIN8) +DECLARE_INSN(smmul, MATCH_SMMUL, MASK_SMMUL) +DECLARE_INSN(smmul_u, MATCH_SMMUL_U, MASK_SMMUL_U) +DECLARE_INSN(smmwb, MATCH_SMMWB, MASK_SMMWB) +DECLARE_INSN(smmwb_u, MATCH_SMMWB_U, MASK_SMMWB_U) +DECLARE_INSN(smmwt, MATCH_SMMWT, MASK_SMMWT) +DECLARE_INSN(smmwt_u, MATCH_SMMWT_U, MASK_SMMWT_U) +DECLARE_INSN(smslda, MATCH_SMSLDA, MASK_SMSLDA) +DECLARE_INSN(smslxda, MATCH_SMSLXDA, MASK_SMSLXDA) +DECLARE_INSN(smsr64, MATCH_SMSR64, MASK_SMSR64) +DECLARE_INSN(smtt16, MATCH_SMTT16, MASK_SMTT16) +DECLARE_INSN(smtt32, MATCH_SMTT32, MASK_SMTT32) +DECLARE_INSN(smul16, MATCH_SMUL16, MASK_SMUL16) +DECLARE_INSN(smul8, MATCH_SMUL8, MASK_SMUL8) +DECLARE_INSN(smulx16, MATCH_SMULX16, MASK_SMULX16) +DECLARE_INSN(smulx8, MATCH_SMULX8, MASK_SMULX8) +DECLARE_INSN(smxds, MATCH_SMXDS, MASK_SMXDS) +DECLARE_INSN(smxds32, MATCH_SMXDS32, MASK_SMXDS32) +DECLARE_INSN(sra, MATCH_SRA, MASK_SRA) +DECLARE_INSN(sra16, MATCH_SRA16, MASK_SRA16) +DECLARE_INSN(sra16_u, MATCH_SRA16_U, MASK_SRA16_U) +DECLARE_INSN(sra32, MATCH_SRA32, MASK_SRA32) +DECLARE_INSN(sra32_u, MATCH_SRA32_U, MASK_SRA32_U) +DECLARE_INSN(sra8, MATCH_SRA8, MASK_SRA8) +DECLARE_INSN(sra8_u, MATCH_SRA8_U, MASK_SRA8_U) +DECLARE_INSN(sra_u, MATCH_SRA_U, MASK_SRA_U) +DECLARE_INSN(srai, MATCH_SRAI, MASK_SRAI) +DECLARE_INSN(srai16, MATCH_SRAI16, MASK_SRAI16) +DECLARE_INSN(srai16_u, MATCH_SRAI16_U, MASK_SRAI16_U) +DECLARE_INSN(srai32, MATCH_SRAI32, MASK_SRAI32) +DECLARE_INSN(srai32_u, MATCH_SRAI32_U, MASK_SRAI32_U) +DECLARE_INSN(srai8, MATCH_SRAI8, MASK_SRAI8) +DECLARE_INSN(srai8_u, MATCH_SRAI8_U, MASK_SRAI8_U) +DECLARE_INSN(srai_rv32, MATCH_SRAI_RV32, MASK_SRAI_RV32) +DECLARE_INSN(srai_u, MATCH_SRAI_U, MASK_SRAI_U) +DECLARE_INSN(sraiw, MATCH_SRAIW, MASK_SRAIW) +DECLARE_INSN(sraiw_u, MATCH_SRAIW_U, MASK_SRAIW_U) +DECLARE_INSN(sraw, MATCH_SRAW, MASK_SRAW) +DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) +DECLARE_INSN(srl, MATCH_SRL, MASK_SRL) +DECLARE_INSN(srl16, MATCH_SRL16, MASK_SRL16) +DECLARE_INSN(srl16_u, MATCH_SRL16_U, MASK_SRL16_U) +DECLARE_INSN(srl32, MATCH_SRL32, MASK_SRL32) +DECLARE_INSN(srl32_u, MATCH_SRL32_U, MASK_SRL32_U) +DECLARE_INSN(srl8, MATCH_SRL8, MASK_SRL8) +DECLARE_INSN(srl8_u, MATCH_SRL8_U, MASK_SRL8_U) +DECLARE_INSN(srli, MATCH_SRLI, MASK_SRLI) +DECLARE_INSN(srli16, MATCH_SRLI16, MASK_SRLI16) +DECLARE_INSN(srli16_u, MATCH_SRLI16_U, MASK_SRLI16_U) +DECLARE_INSN(srli32, MATCH_SRLI32, MASK_SRLI32) +DECLARE_INSN(srli32_u, MATCH_SRLI32_U, MASK_SRLI32_U) +DECLARE_INSN(srli8, MATCH_SRLI8, MASK_SRLI8) +DECLARE_INSN(srli8_u, MATCH_SRLI8_U, MASK_SRLI8_U) +DECLARE_INSN(srli_rv32, MATCH_SRLI_RV32, MASK_SRLI_RV32) +DECLARE_INSN(srliw, MATCH_SRLIW, MASK_SRLIW) +DECLARE_INSN(srlw, MATCH_SRLW, MASK_SRLW) +DECLARE_INSN(sro, MATCH_SRO, MASK_SRO) +DECLARE_INSN(sroi, MATCH_SROI, MASK_SROI) +DECLARE_INSN(sroiw, MATCH_SROIW, MASK_SROIW) +DECLARE_INSN(srow, MATCH_SROW, MASK_SROW) +DECLARE_INSN(stas16, MATCH_STAS16, MASK_STAS16) +DECLARE_INSN(stas32, MATCH_STAS32, MASK_STAS32) +DECLARE_INSN(stsa16, MATCH_STSA16, MASK_STSA16) +DECLARE_INSN(stsa32, MATCH_STSA32, MASK_STSA32) +DECLARE_INSN(sub, MATCH_SUB, MASK_SUB) +DECLARE_INSN(sub16, MATCH_SUB16, MASK_SUB16) +DECLARE_INSN(sub32, MATCH_SUB32, MASK_SUB32) +DECLARE_INSN(sub64, MATCH_SUB64, MASK_SUB64) +DECLARE_INSN(sub8, MATCH_SUB8, MASK_SUB8) +DECLARE_INSN(subw, MATCH_SUBW, MASK_SUBW) +DECLARE_INSN(sunpkd810, MATCH_SUNPKD810, MASK_SUNPKD810) +DECLARE_INSN(sunpkd820, MATCH_SUNPKD820, MASK_SUNPKD820) +DECLARE_INSN(sunpkd830, MATCH_SUNPKD830, MASK_SUNPKD830) +DECLARE_INSN(sunpkd831, MATCH_SUNPKD831, MASK_SUNPKD831) +DECLARE_INSN(sunpkd832, MATCH_SUNPKD832, MASK_SUNPKD832) +DECLARE_INSN(sw, MATCH_SW, MASK_SW) +DECLARE_INSN(uclip16, MATCH_UCLIP16, MASK_UCLIP16) +DECLARE_INSN(uclip32, MATCH_UCLIP32, MASK_UCLIP32) +DECLARE_INSN(uclip8, MATCH_UCLIP8, MASK_UCLIP8) +DECLARE_INSN(ucmple16, MATCH_UCMPLE16, MASK_UCMPLE16) +DECLARE_INSN(ucmple8, MATCH_UCMPLE8, MASK_UCMPLE8) +DECLARE_INSN(ucmplt16, MATCH_UCMPLT16, MASK_UCMPLT16) +DECLARE_INSN(ucmplt8, MATCH_UCMPLT8, MASK_UCMPLT8) +DECLARE_INSN(ukadd16, MATCH_UKADD16, MASK_UKADD16) +DECLARE_INSN(ukadd32, MATCH_UKADD32, MASK_UKADD32) +DECLARE_INSN(ukadd64, MATCH_UKADD64, MASK_UKADD64) +DECLARE_INSN(ukadd8, MATCH_UKADD8, MASK_UKADD8) +DECLARE_INSN(ukaddh, MATCH_UKADDH, MASK_UKADDH) +DECLARE_INSN(ukaddw, MATCH_UKADDW, MASK_UKADDW) +DECLARE_INSN(ukcras16, MATCH_UKCRAS16, MASK_UKCRAS16) +DECLARE_INSN(ukcras32, MATCH_UKCRAS32, MASK_UKCRAS32) +DECLARE_INSN(ukcrsa16, MATCH_UKCRSA16, MASK_UKCRSA16) +DECLARE_INSN(ukcrsa32, MATCH_UKCRSA32, MASK_UKCRSA32) +DECLARE_INSN(ukmar64, MATCH_UKMAR64, MASK_UKMAR64) +DECLARE_INSN(ukmsr64, MATCH_UKMSR64, MASK_UKMSR64) +DECLARE_INSN(ukstas16, MATCH_UKSTAS16, MASK_UKSTAS16) +DECLARE_INSN(ukstas32, MATCH_UKSTAS32, MASK_UKSTAS32) +DECLARE_INSN(ukstsa16, MATCH_UKSTSA16, MASK_UKSTSA16) +DECLARE_INSN(ukstsa32, MATCH_UKSTSA32, MASK_UKSTSA32) +DECLARE_INSN(uksub16, MATCH_UKSUB16, MASK_UKSUB16) +DECLARE_INSN(uksub32, MATCH_UKSUB32, MASK_UKSUB32) +DECLARE_INSN(uksub64, MATCH_UKSUB64, MASK_UKSUB64) +DECLARE_INSN(uksub8, MATCH_UKSUB8, MASK_UKSUB8) +DECLARE_INSN(uksubh, MATCH_UKSUBH, MASK_UKSUBH) +DECLARE_INSN(uksubw, MATCH_UKSUBW, MASK_UKSUBW) +DECLARE_INSN(umaqa, MATCH_UMAQA, MASK_UMAQA) +DECLARE_INSN(umar64, MATCH_UMAR64, MASK_UMAR64) +DECLARE_INSN(umax16, MATCH_UMAX16, MASK_UMAX16) +DECLARE_INSN(umax32, MATCH_UMAX32, MASK_UMAX32) +DECLARE_INSN(umax8, MATCH_UMAX8, MASK_UMAX8) +DECLARE_INSN(umin16, MATCH_UMIN16, MASK_UMIN16) +DECLARE_INSN(umin32, MATCH_UMIN32, MASK_UMIN32) +DECLARE_INSN(umin8, MATCH_UMIN8, MASK_UMIN8) +DECLARE_INSN(umsr64, MATCH_UMSR64, MASK_UMSR64) +DECLARE_INSN(umul16, MATCH_UMUL16, MASK_UMUL16) +DECLARE_INSN(umul8, MATCH_UMUL8, MASK_UMUL8) +DECLARE_INSN(umulx16, MATCH_UMULX16, MASK_UMULX16) +DECLARE_INSN(umulx8, MATCH_UMULX8, MASK_UMULX8) +DECLARE_INSN(unshfl, MATCH_UNSHFL, MASK_UNSHFL) +DECLARE_INSN(unshfli, MATCH_UNSHFLI, MASK_UNSHFLI) +DECLARE_INSN(unshflw, MATCH_UNSHFLW, MASK_UNSHFLW) +DECLARE_INSN(uradd16, MATCH_URADD16, MASK_URADD16) +DECLARE_INSN(uradd32, MATCH_URADD32, MASK_URADD32) +DECLARE_INSN(uradd64, MATCH_URADD64, MASK_URADD64) +DECLARE_INSN(uradd8, MATCH_URADD8, MASK_URADD8) +DECLARE_INSN(uraddw, MATCH_URADDW, MASK_URADDW) +DECLARE_INSN(urcras16, MATCH_URCRAS16, MASK_URCRAS16) +DECLARE_INSN(urcras32, MATCH_URCRAS32, MASK_URCRAS32) +DECLARE_INSN(urcrsa16, MATCH_URCRSA16, MASK_URCRSA16) +DECLARE_INSN(urcrsa32, MATCH_URCRSA32, MASK_URCRSA32) +DECLARE_INSN(urstas16, MATCH_URSTAS16, MASK_URSTAS16) +DECLARE_INSN(urstas32, MATCH_URSTAS32, MASK_URSTAS32) +DECLARE_INSN(urstsa16, MATCH_URSTSA16, MASK_URSTSA16) +DECLARE_INSN(urstsa32, MATCH_URSTSA32, MASK_URSTSA32) +DECLARE_INSN(ursub16, MATCH_URSUB16, MASK_URSUB16) +DECLARE_INSN(ursub32, MATCH_URSUB32, MASK_URSUB32) +DECLARE_INSN(ursub64, MATCH_URSUB64, MASK_URSUB64) +DECLARE_INSN(ursub8, MATCH_URSUB8, MASK_URSUB8) +DECLARE_INSN(ursubw, MATCH_URSUBW, MASK_URSUBW) +DECLARE_INSN(vaadd_vv, MATCH_VAADD_VV, MASK_VAADD_VV) +DECLARE_INSN(vaadd_vx, MATCH_VAADD_VX, MASK_VAADD_VX) +DECLARE_INSN(vaaddu_vv, MATCH_VAADDU_VV, MASK_VAADDU_VV) +DECLARE_INSN(vaaddu_vx, MATCH_VAADDU_VX, MASK_VAADDU_VX) +DECLARE_INSN(vadc_vim, MATCH_VADC_VIM, MASK_VADC_VIM) +DECLARE_INSN(vadc_vvm, MATCH_VADC_VVM, MASK_VADC_VVM) +DECLARE_INSN(vadc_vxm, MATCH_VADC_VXM, MASK_VADC_VXM) +DECLARE_INSN(vadd_vi, MATCH_VADD_VI, MASK_VADD_VI) +DECLARE_INSN(vadd_vv, MATCH_VADD_VV, MASK_VADD_VV) +DECLARE_INSN(vadd_vx, MATCH_VADD_VX, MASK_VADD_VX) +DECLARE_INSN(vamoaddei16_v, MATCH_VAMOADDEI16_V, MASK_VAMOADDEI16_V) +DECLARE_INSN(vamoaddei32_v, MATCH_VAMOADDEI32_V, MASK_VAMOADDEI32_V) +DECLARE_INSN(vamoaddei64_v, MATCH_VAMOADDEI64_V, MASK_VAMOADDEI64_V) +DECLARE_INSN(vamoaddei8_v, MATCH_VAMOADDEI8_V, MASK_VAMOADDEI8_V) +DECLARE_INSN(vamoandei16_v, MATCH_VAMOANDEI16_V, MASK_VAMOANDEI16_V) +DECLARE_INSN(vamoandei32_v, MATCH_VAMOANDEI32_V, MASK_VAMOANDEI32_V) +DECLARE_INSN(vamoandei64_v, MATCH_VAMOANDEI64_V, MASK_VAMOANDEI64_V) +DECLARE_INSN(vamoandei8_v, MATCH_VAMOANDEI8_V, MASK_VAMOANDEI8_V) +DECLARE_INSN(vamomaxei16_v, MATCH_VAMOMAXEI16_V, MASK_VAMOMAXEI16_V) +DECLARE_INSN(vamomaxei32_v, MATCH_VAMOMAXEI32_V, MASK_VAMOMAXEI32_V) +DECLARE_INSN(vamomaxei64_v, MATCH_VAMOMAXEI64_V, MASK_VAMOMAXEI64_V) +DECLARE_INSN(vamomaxei8_v, MATCH_VAMOMAXEI8_V, MASK_VAMOMAXEI8_V) +DECLARE_INSN(vamomaxuei16_v, MATCH_VAMOMAXUEI16_V, MASK_VAMOMAXUEI16_V) +DECLARE_INSN(vamomaxuei32_v, MATCH_VAMOMAXUEI32_V, MASK_VAMOMAXUEI32_V) +DECLARE_INSN(vamomaxuei64_v, MATCH_VAMOMAXUEI64_V, MASK_VAMOMAXUEI64_V) +DECLARE_INSN(vamomaxuei8_v, MATCH_VAMOMAXUEI8_V, MASK_VAMOMAXUEI8_V) +DECLARE_INSN(vamominei16_v, MATCH_VAMOMINEI16_V, MASK_VAMOMINEI16_V) +DECLARE_INSN(vamominei32_v, MATCH_VAMOMINEI32_V, MASK_VAMOMINEI32_V) +DECLARE_INSN(vamominei64_v, MATCH_VAMOMINEI64_V, MASK_VAMOMINEI64_V) +DECLARE_INSN(vamominei8_v, MATCH_VAMOMINEI8_V, MASK_VAMOMINEI8_V) +DECLARE_INSN(vamominuei16_v, MATCH_VAMOMINUEI16_V, MASK_VAMOMINUEI16_V) +DECLARE_INSN(vamominuei32_v, MATCH_VAMOMINUEI32_V, MASK_VAMOMINUEI32_V) +DECLARE_INSN(vamominuei64_v, MATCH_VAMOMINUEI64_V, MASK_VAMOMINUEI64_V) +DECLARE_INSN(vamominuei8_v, MATCH_VAMOMINUEI8_V, MASK_VAMOMINUEI8_V) +DECLARE_INSN(vamoorei16_v, MATCH_VAMOOREI16_V, MASK_VAMOOREI16_V) +DECLARE_INSN(vamoorei32_v, MATCH_VAMOOREI32_V, MASK_VAMOOREI32_V) +DECLARE_INSN(vamoorei64_v, MATCH_VAMOOREI64_V, MASK_VAMOOREI64_V) +DECLARE_INSN(vamoorei8_v, MATCH_VAMOOREI8_V, MASK_VAMOOREI8_V) +DECLARE_INSN(vamoswapei16_v, MATCH_VAMOSWAPEI16_V, MASK_VAMOSWAPEI16_V) +DECLARE_INSN(vamoswapei32_v, MATCH_VAMOSWAPEI32_V, MASK_VAMOSWAPEI32_V) +DECLARE_INSN(vamoswapei64_v, MATCH_VAMOSWAPEI64_V, MASK_VAMOSWAPEI64_V) +DECLARE_INSN(vamoswapei8_v, MATCH_VAMOSWAPEI8_V, MASK_VAMOSWAPEI8_V) +DECLARE_INSN(vamoxorei16_v, MATCH_VAMOXOREI16_V, MASK_VAMOXOREI16_V) +DECLARE_INSN(vamoxorei32_v, MATCH_VAMOXOREI32_V, MASK_VAMOXOREI32_V) +DECLARE_INSN(vamoxorei64_v, MATCH_VAMOXOREI64_V, MASK_VAMOXOREI64_V) +DECLARE_INSN(vamoxorei8_v, MATCH_VAMOXOREI8_V, MASK_VAMOXOREI8_V) +DECLARE_INSN(vand_vi, MATCH_VAND_VI, MASK_VAND_VI) +DECLARE_INSN(vand_vv, MATCH_VAND_VV, MASK_VAND_VV) +DECLARE_INSN(vand_vx, MATCH_VAND_VX, MASK_VAND_VX) +DECLARE_INSN(vasub_vv, MATCH_VASUB_VV, MASK_VASUB_VV) +DECLARE_INSN(vasub_vx, MATCH_VASUB_VX, MASK_VASUB_VX) +DECLARE_INSN(vasubu_vv, MATCH_VASUBU_VV, MASK_VASUBU_VV) +DECLARE_INSN(vasubu_vx, MATCH_VASUBU_VX, MASK_VASUBU_VX) +DECLARE_INSN(vcompress_vm, MATCH_VCOMPRESS_VM, MASK_VCOMPRESS_VM) +DECLARE_INSN(vcpop_m, MATCH_VCPOP_M, MASK_VCPOP_M) +DECLARE_INSN(vdiv_vv, MATCH_VDIV_VV, MASK_VDIV_VV) +DECLARE_INSN(vdiv_vx, MATCH_VDIV_VX, MASK_VDIV_VX) +DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) +DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) +DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF) +DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV) +DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V) +DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) +DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) +DECLARE_INSN(vfcvt_rtz_x_f_v, MATCH_VFCVT_RTZ_X_F_V, MASK_VFCVT_RTZ_X_F_V) +DECLARE_INSN(vfcvt_rtz_xu_f_v, MATCH_VFCVT_RTZ_XU_F_V, MASK_VFCVT_RTZ_XU_F_V) +DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) +DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) +DECLARE_INSN(vfdiv_vf, MATCH_VFDIV_VF, MASK_VFDIV_VF) +DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV) +DECLARE_INSN(vfirst_m, MATCH_VFIRST_M, MASK_VFIRST_M) +DECLARE_INSN(vfmacc_vf, MATCH_VFMACC_VF, MASK_VFMACC_VF) +DECLARE_INSN(vfmacc_vv, MATCH_VFMACC_VV, MASK_VFMACC_VV) +DECLARE_INSN(vfmadd_vf, MATCH_VFMADD_VF, MASK_VFMADD_VF) +DECLARE_INSN(vfmadd_vv, MATCH_VFMADD_VV, MASK_VFMADD_VV) +DECLARE_INSN(vfmax_vf, MATCH_VFMAX_VF, MASK_VFMAX_VF) +DECLARE_INSN(vfmax_vv, MATCH_VFMAX_VV, MASK_VFMAX_VV) +DECLARE_INSN(vfmerge_vfm, MATCH_VFMERGE_VFM, MASK_VFMERGE_VFM) +DECLARE_INSN(vfmin_vf, MATCH_VFMIN_VF, MASK_VFMIN_VF) +DECLARE_INSN(vfmin_vv, MATCH_VFMIN_VV, MASK_VFMIN_VV) +DECLARE_INSN(vfmsac_vf, MATCH_VFMSAC_VF, MASK_VFMSAC_VF) +DECLARE_INSN(vfmsac_vv, MATCH_VFMSAC_VV, MASK_VFMSAC_VV) +DECLARE_INSN(vfmsub_vf, MATCH_VFMSUB_VF, MASK_VFMSUB_VF) +DECLARE_INSN(vfmsub_vv, MATCH_VFMSUB_VV, MASK_VFMSUB_VV) +DECLARE_INSN(vfmul_vf, MATCH_VFMUL_VF, MASK_VFMUL_VF) +DECLARE_INSN(vfmul_vv, MATCH_VFMUL_VV, MASK_VFMUL_VV) +DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S) +DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F) +DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F) +DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W) +DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W) +DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W) +DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W) +DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) +DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) +DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) +DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) +DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) +DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) +DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) +DECLARE_INSN(vfnmadd_vv, MATCH_VFNMADD_VV, MASK_VFNMADD_VV) +DECLARE_INSN(vfnmsac_vf, MATCH_VFNMSAC_VF, MASK_VFNMSAC_VF) +DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) +DECLARE_INSN(vfnmsub_vf, MATCH_VFNMSUB_VF, MASK_VFNMSUB_VF) +DECLARE_INSN(vfnmsub_vv, MATCH_VFNMSUB_VV, MASK_VFNMSUB_VV) +DECLARE_INSN(vfrdiv_vf, MATCH_VFRDIV_VF, MASK_VFRDIV_VF) +DECLARE_INSN(vfrec7_v, MATCH_VFREC7_V, MASK_VFREC7_V) +DECLARE_INSN(vfredmax_vs, MATCH_VFREDMAX_VS, MASK_VFREDMAX_VS) +DECLARE_INSN(vfredmin_vs, MATCH_VFREDMIN_VS, MASK_VFREDMIN_VS) +DECLARE_INSN(vfredosum_vs, MATCH_VFREDOSUM_VS, MASK_VFREDOSUM_VS) +DECLARE_INSN(vfredusum_vs, MATCH_VFREDUSUM_VS, MASK_VFREDUSUM_VS) +DECLARE_INSN(vfrsqrt7_v, MATCH_VFRSQRT7_V, MASK_VFRSQRT7_V) +DECLARE_INSN(vfrsub_vf, MATCH_VFRSUB_VF, MASK_VFRSUB_VF) +DECLARE_INSN(vfsgnj_vf, MATCH_VFSGNJ_VF, MASK_VFSGNJ_VF) +DECLARE_INSN(vfsgnj_vv, MATCH_VFSGNJ_VV, MASK_VFSGNJ_VV) +DECLARE_INSN(vfsgnjn_vf, MATCH_VFSGNJN_VF, MASK_VFSGNJN_VF) +DECLARE_INSN(vfsgnjn_vv, MATCH_VFSGNJN_VV, MASK_VFSGNJN_VV) +DECLARE_INSN(vfsgnjx_vf, MATCH_VFSGNJX_VF, MASK_VFSGNJX_VF) +DECLARE_INSN(vfsgnjx_vv, MATCH_VFSGNJX_VV, MASK_VFSGNJX_VV) +DECLARE_INSN(vfslide1down_vf, MATCH_VFSLIDE1DOWN_VF, MASK_VFSLIDE1DOWN_VF) +DECLARE_INSN(vfslide1up_vf, MATCH_VFSLIDE1UP_VF, MASK_VFSLIDE1UP_VF) +DECLARE_INSN(vfsqrt_v, MATCH_VFSQRT_V, MASK_VFSQRT_V) +DECLARE_INSN(vfsub_vf, MATCH_VFSUB_VF, MASK_VFSUB_VF) +DECLARE_INSN(vfsub_vv, MATCH_VFSUB_VV, MASK_VFSUB_VV) +DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF) +DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV) +DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF) +DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV) +DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V) +DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V) +DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V) +DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V) +DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) +DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) +DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) +DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) +DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) +DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF) +DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV) +DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF) +DECLARE_INSN(vfwmul_vv, MATCH_VFWMUL_VV, MASK_VFWMUL_VV) +DECLARE_INSN(vfwnmacc_vf, MATCH_VFWNMACC_VF, MASK_VFWNMACC_VF) +DECLARE_INSN(vfwnmacc_vv, MATCH_VFWNMACC_VV, MASK_VFWNMACC_VV) +DECLARE_INSN(vfwnmsac_vf, MATCH_VFWNMSAC_VF, MASK_VFWNMSAC_VF) +DECLARE_INSN(vfwnmsac_vv, MATCH_VFWNMSAC_VV, MASK_VFWNMSAC_VV) +DECLARE_INSN(vfwredosum_vs, MATCH_VFWREDOSUM_VS, MASK_VFWREDOSUM_VS) +DECLARE_INSN(vfwredusum_vs, MATCH_VFWREDUSUM_VS, MASK_VFWREDUSUM_VS) +DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) +DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) +DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) +DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) +DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) +DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) +DECLARE_INSN(vl1re16_v, MATCH_VL1RE16_V, MASK_VL1RE16_V) +DECLARE_INSN(vl1re32_v, MATCH_VL1RE32_V, MASK_VL1RE32_V) +DECLARE_INSN(vl1re64_v, MATCH_VL1RE64_V, MASK_VL1RE64_V) +DECLARE_INSN(vl1re8_v, MATCH_VL1RE8_V, MASK_VL1RE8_V) +DECLARE_INSN(vl2re16_v, MATCH_VL2RE16_V, MASK_VL2RE16_V) +DECLARE_INSN(vl2re32_v, MATCH_VL2RE32_V, MASK_VL2RE32_V) +DECLARE_INSN(vl2re64_v, MATCH_VL2RE64_V, MASK_VL2RE64_V) +DECLARE_INSN(vl2re8_v, MATCH_VL2RE8_V, MASK_VL2RE8_V) +DECLARE_INSN(vl4re16_v, MATCH_VL4RE16_V, MASK_VL4RE16_V) +DECLARE_INSN(vl4re32_v, MATCH_VL4RE32_V, MASK_VL4RE32_V) +DECLARE_INSN(vl4re64_v, MATCH_VL4RE64_V, MASK_VL4RE64_V) +DECLARE_INSN(vl4re8_v, MATCH_VL4RE8_V, MASK_VL4RE8_V) +DECLARE_INSN(vl8re16_v, MATCH_VL8RE16_V, MASK_VL8RE16_V) +DECLARE_INSN(vl8re32_v, MATCH_VL8RE32_V, MASK_VL8RE32_V) +DECLARE_INSN(vl8re64_v, MATCH_VL8RE64_V, MASK_VL8RE64_V) +DECLARE_INSN(vl8re8_v, MATCH_VL8RE8_V, MASK_VL8RE8_V) +DECLARE_INSN(vle1024_v, MATCH_VLE1024_V, MASK_VLE1024_V) +DECLARE_INSN(vle1024ff_v, MATCH_VLE1024FF_V, MASK_VLE1024FF_V) +DECLARE_INSN(vle128_v, MATCH_VLE128_V, MASK_VLE128_V) +DECLARE_INSN(vle128ff_v, MATCH_VLE128FF_V, MASK_VLE128FF_V) +DECLARE_INSN(vle16_v, MATCH_VLE16_V, MASK_VLE16_V) +DECLARE_INSN(vle16ff_v, MATCH_VLE16FF_V, MASK_VLE16FF_V) +DECLARE_INSN(vle256_v, MATCH_VLE256_V, MASK_VLE256_V) +DECLARE_INSN(vle256ff_v, MATCH_VLE256FF_V, MASK_VLE256FF_V) +DECLARE_INSN(vle32_v, MATCH_VLE32_V, MASK_VLE32_V) +DECLARE_INSN(vle32ff_v, MATCH_VLE32FF_V, MASK_VLE32FF_V) +DECLARE_INSN(vle512_v, MATCH_VLE512_V, MASK_VLE512_V) +DECLARE_INSN(vle512ff_v, MATCH_VLE512FF_V, MASK_VLE512FF_V) +DECLARE_INSN(vle64_v, MATCH_VLE64_V, MASK_VLE64_V) +DECLARE_INSN(vle64ff_v, MATCH_VLE64FF_V, MASK_VLE64FF_V) +DECLARE_INSN(vle8_v, MATCH_VLE8_V, MASK_VLE8_V) +DECLARE_INSN(vle8ff_v, MATCH_VLE8FF_V, MASK_VLE8FF_V) +DECLARE_INSN(vlm_v, MATCH_VLM_V, MASK_VLM_V) +DECLARE_INSN(vloxei1024_v, MATCH_VLOXEI1024_V, MASK_VLOXEI1024_V) +DECLARE_INSN(vloxei128_v, MATCH_VLOXEI128_V, MASK_VLOXEI128_V) +DECLARE_INSN(vloxei16_v, MATCH_VLOXEI16_V, MASK_VLOXEI16_V) +DECLARE_INSN(vloxei256_v, MATCH_VLOXEI256_V, MASK_VLOXEI256_V) +DECLARE_INSN(vloxei32_v, MATCH_VLOXEI32_V, MASK_VLOXEI32_V) +DECLARE_INSN(vloxei512_v, MATCH_VLOXEI512_V, MASK_VLOXEI512_V) +DECLARE_INSN(vloxei64_v, MATCH_VLOXEI64_V, MASK_VLOXEI64_V) +DECLARE_INSN(vloxei8_v, MATCH_VLOXEI8_V, MASK_VLOXEI8_V) +DECLARE_INSN(vlse1024_v, MATCH_VLSE1024_V, MASK_VLSE1024_V) +DECLARE_INSN(vlse128_v, MATCH_VLSE128_V, MASK_VLSE128_V) +DECLARE_INSN(vlse16_v, MATCH_VLSE16_V, MASK_VLSE16_V) +DECLARE_INSN(vlse256_v, MATCH_VLSE256_V, MASK_VLSE256_V) +DECLARE_INSN(vlse32_v, MATCH_VLSE32_V, MASK_VLSE32_V) +DECLARE_INSN(vlse512_v, MATCH_VLSE512_V, MASK_VLSE512_V) +DECLARE_INSN(vlse64_v, MATCH_VLSE64_V, MASK_VLSE64_V) +DECLARE_INSN(vlse8_v, MATCH_VLSE8_V, MASK_VLSE8_V) +DECLARE_INSN(vluxei1024_v, MATCH_VLUXEI1024_V, MASK_VLUXEI1024_V) +DECLARE_INSN(vluxei128_v, MATCH_VLUXEI128_V, MASK_VLUXEI128_V) +DECLARE_INSN(vluxei16_v, MATCH_VLUXEI16_V, MASK_VLUXEI16_V) +DECLARE_INSN(vluxei256_v, MATCH_VLUXEI256_V, MASK_VLUXEI256_V) +DECLARE_INSN(vluxei32_v, MATCH_VLUXEI32_V, MASK_VLUXEI32_V) +DECLARE_INSN(vluxei512_v, MATCH_VLUXEI512_V, MASK_VLUXEI512_V) +DECLARE_INSN(vluxei64_v, MATCH_VLUXEI64_V, MASK_VLUXEI64_V) +DECLARE_INSN(vluxei8_v, MATCH_VLUXEI8_V, MASK_VLUXEI8_V) +DECLARE_INSN(vmacc_vv, MATCH_VMACC_VV, MASK_VMACC_VV) +DECLARE_INSN(vmacc_vx, MATCH_VMACC_VX, MASK_VMACC_VX) +DECLARE_INSN(vmadc_vi, MATCH_VMADC_VI, MASK_VMADC_VI) +DECLARE_INSN(vmadc_vim, MATCH_VMADC_VIM, MASK_VMADC_VIM) +DECLARE_INSN(vmadc_vv, MATCH_VMADC_VV, MASK_VMADC_VV) +DECLARE_INSN(vmadc_vvm, MATCH_VMADC_VVM, MASK_VMADC_VVM) +DECLARE_INSN(vmadc_vx, MATCH_VMADC_VX, MASK_VMADC_VX) +DECLARE_INSN(vmadc_vxm, MATCH_VMADC_VXM, MASK_VMADC_VXM) +DECLARE_INSN(vmadd_vv, MATCH_VMADD_VV, MASK_VMADD_VV) +DECLARE_INSN(vmadd_vx, MATCH_VMADD_VX, MASK_VMADD_VX) +DECLARE_INSN(vmand_mm, MATCH_VMAND_MM, MASK_VMAND_MM) +DECLARE_INSN(vmandn_mm, MATCH_VMANDN_MM, MASK_VMANDN_MM) +DECLARE_INSN(vmax_vv, MATCH_VMAX_VV, MASK_VMAX_VV) +DECLARE_INSN(vmax_vx, MATCH_VMAX_VX, MASK_VMAX_VX) +DECLARE_INSN(vmaxu_vv, MATCH_VMAXU_VV, MASK_VMAXU_VV) +DECLARE_INSN(vmaxu_vx, MATCH_VMAXU_VX, MASK_VMAXU_VX) +DECLARE_INSN(vmerge_vim, MATCH_VMERGE_VIM, MASK_VMERGE_VIM) +DECLARE_INSN(vmerge_vvm, MATCH_VMERGE_VVM, MASK_VMERGE_VVM) +DECLARE_INSN(vmerge_vxm, MATCH_VMERGE_VXM, MASK_VMERGE_VXM) +DECLARE_INSN(vmfeq_vf, MATCH_VMFEQ_VF, MASK_VMFEQ_VF) +DECLARE_INSN(vmfeq_vv, MATCH_VMFEQ_VV, MASK_VMFEQ_VV) +DECLARE_INSN(vmfge_vf, MATCH_VMFGE_VF, MASK_VMFGE_VF) +DECLARE_INSN(vmfgt_vf, MATCH_VMFGT_VF, MASK_VMFGT_VF) +DECLARE_INSN(vmfle_vf, MATCH_VMFLE_VF, MASK_VMFLE_VF) +DECLARE_INSN(vmfle_vv, MATCH_VMFLE_VV, MASK_VMFLE_VV) +DECLARE_INSN(vmflt_vf, MATCH_VMFLT_VF, MASK_VMFLT_VF) +DECLARE_INSN(vmflt_vv, MATCH_VMFLT_VV, MASK_VMFLT_VV) +DECLARE_INSN(vmfne_vf, MATCH_VMFNE_VF, MASK_VMFNE_VF) +DECLARE_INSN(vmfne_vv, MATCH_VMFNE_VV, MASK_VMFNE_VV) +DECLARE_INSN(vmin_vv, MATCH_VMIN_VV, MASK_VMIN_VV) +DECLARE_INSN(vmin_vx, MATCH_VMIN_VX, MASK_VMIN_VX) +DECLARE_INSN(vminu_vv, MATCH_VMINU_VV, MASK_VMINU_VV) +DECLARE_INSN(vminu_vx, MATCH_VMINU_VX, MASK_VMINU_VX) +DECLARE_INSN(vmnand_mm, MATCH_VMNAND_MM, MASK_VMNAND_MM) +DECLARE_INSN(vmnor_mm, MATCH_VMNOR_MM, MASK_VMNOR_MM) +DECLARE_INSN(vmor_mm, MATCH_VMOR_MM, MASK_VMOR_MM) +DECLARE_INSN(vmorn_mm, MATCH_VMORN_MM, MASK_VMORN_MM) +DECLARE_INSN(vmsbc_vv, MATCH_VMSBC_VV, MASK_VMSBC_VV) +DECLARE_INSN(vmsbc_vvm, MATCH_VMSBC_VVM, MASK_VMSBC_VVM) +DECLARE_INSN(vmsbc_vx, MATCH_VMSBC_VX, MASK_VMSBC_VX) +DECLARE_INSN(vmsbc_vxm, MATCH_VMSBC_VXM, MASK_VMSBC_VXM) +DECLARE_INSN(vmsbf_m, MATCH_VMSBF_M, MASK_VMSBF_M) +DECLARE_INSN(vmseq_vi, MATCH_VMSEQ_VI, MASK_VMSEQ_VI) +DECLARE_INSN(vmseq_vv, MATCH_VMSEQ_VV, MASK_VMSEQ_VV) +DECLARE_INSN(vmseq_vx, MATCH_VMSEQ_VX, MASK_VMSEQ_VX) +DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI) +DECLARE_INSN(vmsgt_vx, MATCH_VMSGT_VX, MASK_VMSGT_VX) +DECLARE_INSN(vmsgtu_vi, MATCH_VMSGTU_VI, MASK_VMSGTU_VI) +DECLARE_INSN(vmsgtu_vx, MATCH_VMSGTU_VX, MASK_VMSGTU_VX) +DECLARE_INSN(vmsif_m, MATCH_VMSIF_M, MASK_VMSIF_M) +DECLARE_INSN(vmsle_vi, MATCH_VMSLE_VI, MASK_VMSLE_VI) +DECLARE_INSN(vmsle_vv, MATCH_VMSLE_VV, MASK_VMSLE_VV) +DECLARE_INSN(vmsle_vx, MATCH_VMSLE_VX, MASK_VMSLE_VX) +DECLARE_INSN(vmsleu_vi, MATCH_VMSLEU_VI, MASK_VMSLEU_VI) +DECLARE_INSN(vmsleu_vv, MATCH_VMSLEU_VV, MASK_VMSLEU_VV) +DECLARE_INSN(vmsleu_vx, MATCH_VMSLEU_VX, MASK_VMSLEU_VX) +DECLARE_INSN(vmslt_vv, MATCH_VMSLT_VV, MASK_VMSLT_VV) +DECLARE_INSN(vmslt_vx, MATCH_VMSLT_VX, MASK_VMSLT_VX) +DECLARE_INSN(vmsltu_vv, MATCH_VMSLTU_VV, MASK_VMSLTU_VV) +DECLARE_INSN(vmsltu_vx, MATCH_VMSLTU_VX, MASK_VMSLTU_VX) +DECLARE_INSN(vmsne_vi, MATCH_VMSNE_VI, MASK_VMSNE_VI) +DECLARE_INSN(vmsne_vv, MATCH_VMSNE_VV, MASK_VMSNE_VV) +DECLARE_INSN(vmsne_vx, MATCH_VMSNE_VX, MASK_VMSNE_VX) +DECLARE_INSN(vmsof_m, MATCH_VMSOF_M, MASK_VMSOF_M) +DECLARE_INSN(vmul_vv, MATCH_VMUL_VV, MASK_VMUL_VV) +DECLARE_INSN(vmul_vx, MATCH_VMUL_VX, MASK_VMUL_VX) +DECLARE_INSN(vmulh_vv, MATCH_VMULH_VV, MASK_VMULH_VV) +DECLARE_INSN(vmulh_vx, MATCH_VMULH_VX, MASK_VMULH_VX) +DECLARE_INSN(vmulhsu_vv, MATCH_VMULHSU_VV, MASK_VMULHSU_VV) +DECLARE_INSN(vmulhsu_vx, MATCH_VMULHSU_VX, MASK_VMULHSU_VX) +DECLARE_INSN(vmulhu_vv, MATCH_VMULHU_VV, MASK_VMULHU_VV) +DECLARE_INSN(vmulhu_vx, MATCH_VMULHU_VX, MASK_VMULHU_VX) +DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V) +DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V) +DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V) +DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V) +DECLARE_INSN(vmv_s_x, MATCH_VMV_S_X, MASK_VMV_S_X) +DECLARE_INSN(vmv_v_i, MATCH_VMV_V_I, MASK_VMV_V_I) +DECLARE_INSN(vmv_v_v, MATCH_VMV_V_V, MASK_VMV_V_V) +DECLARE_INSN(vmv_v_x, MATCH_VMV_V_X, MASK_VMV_V_X) +DECLARE_INSN(vmv_x_s, MATCH_VMV_X_S, MASK_VMV_X_S) +DECLARE_INSN(vmxnor_mm, MATCH_VMXNOR_MM, MASK_VMXNOR_MM) +DECLARE_INSN(vmxor_mm, MATCH_VMXOR_MM, MASK_VMXOR_MM) +DECLARE_INSN(vnclip_wi, MATCH_VNCLIP_WI, MASK_VNCLIP_WI) +DECLARE_INSN(vnclip_wv, MATCH_VNCLIP_WV, MASK_VNCLIP_WV) +DECLARE_INSN(vnclip_wx, MATCH_VNCLIP_WX, MASK_VNCLIP_WX) +DECLARE_INSN(vnclipu_wi, MATCH_VNCLIPU_WI, MASK_VNCLIPU_WI) +DECLARE_INSN(vnclipu_wv, MATCH_VNCLIPU_WV, MASK_VNCLIPU_WV) +DECLARE_INSN(vnclipu_wx, MATCH_VNCLIPU_WX, MASK_VNCLIPU_WX) +DECLARE_INSN(vnmsac_vv, MATCH_VNMSAC_VV, MASK_VNMSAC_VV) +DECLARE_INSN(vnmsac_vx, MATCH_VNMSAC_VX, MASK_VNMSAC_VX) +DECLARE_INSN(vnmsub_vv, MATCH_VNMSUB_VV, MASK_VNMSUB_VV) +DECLARE_INSN(vnmsub_vx, MATCH_VNMSUB_VX, MASK_VNMSUB_VX) +DECLARE_INSN(vnsra_wi, MATCH_VNSRA_WI, MASK_VNSRA_WI) +DECLARE_INSN(vnsra_wv, MATCH_VNSRA_WV, MASK_VNSRA_WV) +DECLARE_INSN(vnsra_wx, MATCH_VNSRA_WX, MASK_VNSRA_WX) +DECLARE_INSN(vnsrl_wi, MATCH_VNSRL_WI, MASK_VNSRL_WI) +DECLARE_INSN(vnsrl_wv, MATCH_VNSRL_WV, MASK_VNSRL_WV) +DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX) +DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI) +DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) +DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX) +DECLARE_INSN(vredand_vs, MATCH_VREDAND_VS, MASK_VREDAND_VS) +DECLARE_INSN(vredmax_vs, MATCH_VREDMAX_VS, MASK_VREDMAX_VS) +DECLARE_INSN(vredmaxu_vs, MATCH_VREDMAXU_VS, MASK_VREDMAXU_VS) +DECLARE_INSN(vredmin_vs, MATCH_VREDMIN_VS, MASK_VREDMIN_VS) +DECLARE_INSN(vredminu_vs, MATCH_VREDMINU_VS, MASK_VREDMINU_VS) +DECLARE_INSN(vredor_vs, MATCH_VREDOR_VS, MASK_VREDOR_VS) +DECLARE_INSN(vredsum_vs, MATCH_VREDSUM_VS, MASK_VREDSUM_VS) +DECLARE_INSN(vredxor_vs, MATCH_VREDXOR_VS, MASK_VREDXOR_VS) +DECLARE_INSN(vrem_vv, MATCH_VREM_VV, MASK_VREM_VV) +DECLARE_INSN(vrem_vx, MATCH_VREM_VX, MASK_VREM_VX) +DECLARE_INSN(vremu_vv, MATCH_VREMU_VV, MASK_VREMU_VV) +DECLARE_INSN(vremu_vx, MATCH_VREMU_VX, MASK_VREMU_VX) +DECLARE_INSN(vrgather_vi, MATCH_VRGATHER_VI, MASK_VRGATHER_VI) +DECLARE_INSN(vrgather_vv, MATCH_VRGATHER_VV, MASK_VRGATHER_VV) +DECLARE_INSN(vrgather_vx, MATCH_VRGATHER_VX, MASK_VRGATHER_VX) +DECLARE_INSN(vrgatherei16_vv, MATCH_VRGATHEREI16_VV, MASK_VRGATHEREI16_VV) +DECLARE_INSN(vrsub_vi, MATCH_VRSUB_VI, MASK_VRSUB_VI) +DECLARE_INSN(vrsub_vx, MATCH_VRSUB_VX, MASK_VRSUB_VX) +DECLARE_INSN(vs1r_v, MATCH_VS1R_V, MASK_VS1R_V) +DECLARE_INSN(vs2r_v, MATCH_VS2R_V, MASK_VS2R_V) +DECLARE_INSN(vs4r_v, MATCH_VS4R_V, MASK_VS4R_V) +DECLARE_INSN(vs8r_v, MATCH_VS8R_V, MASK_VS8R_V) +DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI) +DECLARE_INSN(vsadd_vv, MATCH_VSADD_VV, MASK_VSADD_VV) +DECLARE_INSN(vsadd_vx, MATCH_VSADD_VX, MASK_VSADD_VX) +DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI) +DECLARE_INSN(vsaddu_vv, MATCH_VSADDU_VV, MASK_VSADDU_VV) +DECLARE_INSN(vsaddu_vx, MATCH_VSADDU_VX, MASK_VSADDU_VX) +DECLARE_INSN(vsbc_vvm, MATCH_VSBC_VVM, MASK_VSBC_VVM) +DECLARE_INSN(vsbc_vxm, MATCH_VSBC_VXM, MASK_VSBC_VXM) +DECLARE_INSN(vse1024_v, MATCH_VSE1024_V, MASK_VSE1024_V) +DECLARE_INSN(vse128_v, MATCH_VSE128_V, MASK_VSE128_V) +DECLARE_INSN(vse16_v, MATCH_VSE16_V, MASK_VSE16_V) +DECLARE_INSN(vse256_v, MATCH_VSE256_V, MASK_VSE256_V) +DECLARE_INSN(vse32_v, MATCH_VSE32_V, MASK_VSE32_V) +DECLARE_INSN(vse512_v, MATCH_VSE512_V, MASK_VSE512_V) +DECLARE_INSN(vse64_v, MATCH_VSE64_V, MASK_VSE64_V) +DECLARE_INSN(vse8_v, MATCH_VSE8_V, MASK_VSE8_V) +DECLARE_INSN(vsetivli, MATCH_VSETIVLI, MASK_VSETIVLI) +DECLARE_INSN(vsetvl, MATCH_VSETVL, MASK_VSETVL) +DECLARE_INSN(vsetvli, MATCH_VSETVLI, MASK_VSETVLI) +DECLARE_INSN(vsext_vf2, MATCH_VSEXT_VF2, MASK_VSEXT_VF2) +DECLARE_INSN(vsext_vf4, MATCH_VSEXT_VF4, MASK_VSEXT_VF4) +DECLARE_INSN(vsext_vf8, MATCH_VSEXT_VF8, MASK_VSEXT_VF8) +DECLARE_INSN(vslide1down_vx, MATCH_VSLIDE1DOWN_VX, MASK_VSLIDE1DOWN_VX) +DECLARE_INSN(vslide1up_vx, MATCH_VSLIDE1UP_VX, MASK_VSLIDE1UP_VX) +DECLARE_INSN(vslidedown_vi, MATCH_VSLIDEDOWN_VI, MASK_VSLIDEDOWN_VI) +DECLARE_INSN(vslidedown_vx, MATCH_VSLIDEDOWN_VX, MASK_VSLIDEDOWN_VX) +DECLARE_INSN(vslideup_vi, MATCH_VSLIDEUP_VI, MASK_VSLIDEUP_VI) +DECLARE_INSN(vslideup_vx, MATCH_VSLIDEUP_VX, MASK_VSLIDEUP_VX) +DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI) +DECLARE_INSN(vsll_vv, MATCH_VSLL_VV, MASK_VSLL_VV) +DECLARE_INSN(vsll_vx, MATCH_VSLL_VX, MASK_VSLL_VX) +DECLARE_INSN(vsm_v, MATCH_VSM_V, MASK_VSM_V) +DECLARE_INSN(vsmul_vv, MATCH_VSMUL_VV, MASK_VSMUL_VV) +DECLARE_INSN(vsmul_vx, MATCH_VSMUL_VX, MASK_VSMUL_VX) +DECLARE_INSN(vsoxei1024_v, MATCH_VSOXEI1024_V, MASK_VSOXEI1024_V) +DECLARE_INSN(vsoxei128_v, MATCH_VSOXEI128_V, MASK_VSOXEI128_V) +DECLARE_INSN(vsoxei16_v, MATCH_VSOXEI16_V, MASK_VSOXEI16_V) +DECLARE_INSN(vsoxei256_v, MATCH_VSOXEI256_V, MASK_VSOXEI256_V) +DECLARE_INSN(vsoxei32_v, MATCH_VSOXEI32_V, MASK_VSOXEI32_V) +DECLARE_INSN(vsoxei512_v, MATCH_VSOXEI512_V, MASK_VSOXEI512_V) +DECLARE_INSN(vsoxei64_v, MATCH_VSOXEI64_V, MASK_VSOXEI64_V) +DECLARE_INSN(vsoxei8_v, MATCH_VSOXEI8_V, MASK_VSOXEI8_V) +DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI) +DECLARE_INSN(vsra_vv, MATCH_VSRA_VV, MASK_VSRA_VV) +DECLARE_INSN(vsra_vx, MATCH_VSRA_VX, MASK_VSRA_VX) +DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI) +DECLARE_INSN(vsrl_vv, MATCH_VSRL_VV, MASK_VSRL_VV) +DECLARE_INSN(vsrl_vx, MATCH_VSRL_VX, MASK_VSRL_VX) +DECLARE_INSN(vsse1024_v, MATCH_VSSE1024_V, MASK_VSSE1024_V) +DECLARE_INSN(vsse128_v, MATCH_VSSE128_V, MASK_VSSE128_V) +DECLARE_INSN(vsse16_v, MATCH_VSSE16_V, MASK_VSSE16_V) +DECLARE_INSN(vsse256_v, MATCH_VSSE256_V, MASK_VSSE256_V) +DECLARE_INSN(vsse32_v, MATCH_VSSE32_V, MASK_VSSE32_V) +DECLARE_INSN(vsse512_v, MATCH_VSSE512_V, MASK_VSSE512_V) +DECLARE_INSN(vsse64_v, MATCH_VSSE64_V, MASK_VSSE64_V) +DECLARE_INSN(vsse8_v, MATCH_VSSE8_V, MASK_VSSE8_V) +DECLARE_INSN(vssra_vi, MATCH_VSSRA_VI, MASK_VSSRA_VI) +DECLARE_INSN(vssra_vv, MATCH_VSSRA_VV, MASK_VSSRA_VV) +DECLARE_INSN(vssra_vx, MATCH_VSSRA_VX, MASK_VSSRA_VX) +DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI) +DECLARE_INSN(vssrl_vv, MATCH_VSSRL_VV, MASK_VSSRL_VV) +DECLARE_INSN(vssrl_vx, MATCH_VSSRL_VX, MASK_VSSRL_VX) +DECLARE_INSN(vssub_vv, MATCH_VSSUB_VV, MASK_VSSUB_VV) +DECLARE_INSN(vssub_vx, MATCH_VSSUB_VX, MASK_VSSUB_VX) +DECLARE_INSN(vssubu_vv, MATCH_VSSUBU_VV, MASK_VSSUBU_VV) +DECLARE_INSN(vssubu_vx, MATCH_VSSUBU_VX, MASK_VSSUBU_VX) +DECLARE_INSN(vsub_vv, MATCH_VSUB_VV, MASK_VSUB_VV) +DECLARE_INSN(vsub_vx, MATCH_VSUB_VX, MASK_VSUB_VX) +DECLARE_INSN(vsuxei1024_v, MATCH_VSUXEI1024_V, MASK_VSUXEI1024_V) +DECLARE_INSN(vsuxei128_v, MATCH_VSUXEI128_V, MASK_VSUXEI128_V) +DECLARE_INSN(vsuxei16_v, MATCH_VSUXEI16_V, MASK_VSUXEI16_V) +DECLARE_INSN(vsuxei256_v, MATCH_VSUXEI256_V, MASK_VSUXEI256_V) +DECLARE_INSN(vsuxei32_v, MATCH_VSUXEI32_V, MASK_VSUXEI32_V) +DECLARE_INSN(vsuxei512_v, MATCH_VSUXEI512_V, MASK_VSUXEI512_V) +DECLARE_INSN(vsuxei64_v, MATCH_VSUXEI64_V, MASK_VSUXEI64_V) +DECLARE_INSN(vsuxei8_v, MATCH_VSUXEI8_V, MASK_VSUXEI8_V) +DECLARE_INSN(vwadd_vv, MATCH_VWADD_VV, MASK_VWADD_VV) +DECLARE_INSN(vwadd_vx, MATCH_VWADD_VX, MASK_VWADD_VX) +DECLARE_INSN(vwadd_wv, MATCH_VWADD_WV, MASK_VWADD_WV) +DECLARE_INSN(vwadd_wx, MATCH_VWADD_WX, MASK_VWADD_WX) +DECLARE_INSN(vwaddu_vv, MATCH_VWADDU_VV, MASK_VWADDU_VV) +DECLARE_INSN(vwaddu_vx, MATCH_VWADDU_VX, MASK_VWADDU_VX) +DECLARE_INSN(vwaddu_wv, MATCH_VWADDU_WV, MASK_VWADDU_WV) +DECLARE_INSN(vwaddu_wx, MATCH_VWADDU_WX, MASK_VWADDU_WX) +DECLARE_INSN(vwmacc_vv, MATCH_VWMACC_VV, MASK_VWMACC_VV) +DECLARE_INSN(vwmacc_vx, MATCH_VWMACC_VX, MASK_VWMACC_VX) +DECLARE_INSN(vwmaccsu_vv, MATCH_VWMACCSU_VV, MASK_VWMACCSU_VV) +DECLARE_INSN(vwmaccsu_vx, MATCH_VWMACCSU_VX, MASK_VWMACCSU_VX) +DECLARE_INSN(vwmaccu_vv, MATCH_VWMACCU_VV, MASK_VWMACCU_VV) +DECLARE_INSN(vwmaccu_vx, MATCH_VWMACCU_VX, MASK_VWMACCU_VX) +DECLARE_INSN(vwmaccus_vx, MATCH_VWMACCUS_VX, MASK_VWMACCUS_VX) +DECLARE_INSN(vwmul_vv, MATCH_VWMUL_VV, MASK_VWMUL_VV) +DECLARE_INSN(vwmul_vx, MATCH_VWMUL_VX, MASK_VWMUL_VX) +DECLARE_INSN(vwmulsu_vv, MATCH_VWMULSU_VV, MASK_VWMULSU_VV) +DECLARE_INSN(vwmulsu_vx, MATCH_VWMULSU_VX, MASK_VWMULSU_VX) +DECLARE_INSN(vwmulu_vv, MATCH_VWMULU_VV, MASK_VWMULU_VV) +DECLARE_INSN(vwmulu_vx, MATCH_VWMULU_VX, MASK_VWMULU_VX) +DECLARE_INSN(vwredsum_vs, MATCH_VWREDSUM_VS, MASK_VWREDSUM_VS) +DECLARE_INSN(vwredsumu_vs, MATCH_VWREDSUMU_VS, MASK_VWREDSUMU_VS) +DECLARE_INSN(vwsub_vv, MATCH_VWSUB_VV, MASK_VWSUB_VV) +DECLARE_INSN(vwsub_vx, MATCH_VWSUB_VX, MASK_VWSUB_VX) +DECLARE_INSN(vwsub_wv, MATCH_VWSUB_WV, MASK_VWSUB_WV) +DECLARE_INSN(vwsub_wx, MATCH_VWSUB_WX, MASK_VWSUB_WX) +DECLARE_INSN(vwsubu_vv, MATCH_VWSUBU_VV, MASK_VWSUBU_VV) +DECLARE_INSN(vwsubu_vx, MATCH_VWSUBU_VX, MASK_VWSUBU_VX) +DECLARE_INSN(vwsubu_wv, MATCH_VWSUBU_WV, MASK_VWSUBU_WV) +DECLARE_INSN(vwsubu_wx, MATCH_VWSUBU_WX, MASK_VWSUBU_WX) +DECLARE_INSN(vxor_vi, MATCH_VXOR_VI, MASK_VXOR_VI) +DECLARE_INSN(vxor_vv, MATCH_VXOR_VV, MASK_VXOR_VV) +DECLARE_INSN(vxor_vx, MATCH_VXOR_VX, MASK_VXOR_VX) +DECLARE_INSN(vzext_vf2, MATCH_VZEXT_VF2, MASK_VZEXT_VF2) +DECLARE_INSN(vzext_vf4, MATCH_VZEXT_VF4, MASK_VZEXT_VF4) +DECLARE_INSN(vzext_vf8, MATCH_VZEXT_VF8, MASK_VZEXT_VF8) +DECLARE_INSN(wfi, MATCH_WFI, MASK_WFI) +DECLARE_INSN(wrs_nto, MATCH_WRS_NTO, MASK_WRS_NTO) +DECLARE_INSN(wrs_sto, MATCH_WRS_STO, MASK_WRS_STO) +DECLARE_INSN(xnor, MATCH_XNOR, MASK_XNOR) +DECLARE_INSN(xor, MATCH_XOR, MASK_XOR) +DECLARE_INSN(xori, MATCH_XORI, MASK_XORI) +DECLARE_INSN(xperm16, MATCH_XPERM16, MASK_XPERM16) +DECLARE_INSN(xperm32, MATCH_XPERM32, MASK_XPERM32) +DECLARE_INSN(xperm4, MATCH_XPERM4, MASK_XPERM4) +DECLARE_INSN(xperm8, MATCH_XPERM8, MASK_XPERM8) +DECLARE_INSN(zunpkd810, MATCH_ZUNPKD810, MASK_ZUNPKD810) +DECLARE_INSN(zunpkd820, MATCH_ZUNPKD820, MASK_ZUNPKD820) +DECLARE_INSN(zunpkd830, MATCH_ZUNPKD830, MASK_ZUNPKD830) +DECLARE_INSN(zunpkd831, MATCH_ZUNPKD831, MASK_ZUNPKD831) +DECLARE_INSN(zunpkd832, MATCH_ZUNPKD832, MASK_ZUNPKD832) +#endif +#ifdef DECLARE_CSR +DECLARE_CSR(fflags, CSR_FFLAGS) +DECLARE_CSR(frm, CSR_FRM) +DECLARE_CSR(fcsr, CSR_FCSR) +DECLARE_CSR(vstart, CSR_VSTART) +DECLARE_CSR(vxsat, CSR_VXSAT) +DECLARE_CSR(vxrm, CSR_VXRM) +DECLARE_CSR(vcsr, CSR_VCSR) +DECLARE_CSR(seed, CSR_SEED) +DECLARE_CSR(jvt, CSR_JVT) +DECLARE_CSR(cycle, CSR_CYCLE) +DECLARE_CSR(time, CSR_TIME) +DECLARE_CSR(instret, CSR_INSTRET) +DECLARE_CSR(hpmcounter3, CSR_HPMCOUNTER3) +DECLARE_CSR(hpmcounter4, CSR_HPMCOUNTER4) +DECLARE_CSR(hpmcounter5, CSR_HPMCOUNTER5) +DECLARE_CSR(hpmcounter6, CSR_HPMCOUNTER6) +DECLARE_CSR(hpmcounter7, CSR_HPMCOUNTER7) +DECLARE_CSR(hpmcounter8, CSR_HPMCOUNTER8) +DECLARE_CSR(hpmcounter9, CSR_HPMCOUNTER9) +DECLARE_CSR(hpmcounter10, CSR_HPMCOUNTER10) +DECLARE_CSR(hpmcounter11, CSR_HPMCOUNTER11) +DECLARE_CSR(hpmcounter12, CSR_HPMCOUNTER12) +DECLARE_CSR(hpmcounter13, CSR_HPMCOUNTER13) +DECLARE_CSR(hpmcounter14, CSR_HPMCOUNTER14) +DECLARE_CSR(hpmcounter15, CSR_HPMCOUNTER15) +DECLARE_CSR(hpmcounter16, CSR_HPMCOUNTER16) +DECLARE_CSR(hpmcounter17, CSR_HPMCOUNTER17) +DECLARE_CSR(hpmcounter18, CSR_HPMCOUNTER18) +DECLARE_CSR(hpmcounter19, CSR_HPMCOUNTER19) +DECLARE_CSR(hpmcounter20, CSR_HPMCOUNTER20) +DECLARE_CSR(hpmcounter21, CSR_HPMCOUNTER21) +DECLARE_CSR(hpmcounter22, CSR_HPMCOUNTER22) +DECLARE_CSR(hpmcounter23, CSR_HPMCOUNTER23) +DECLARE_CSR(hpmcounter24, CSR_HPMCOUNTER24) +DECLARE_CSR(hpmcounter25, CSR_HPMCOUNTER25) +DECLARE_CSR(hpmcounter26, CSR_HPMCOUNTER26) +DECLARE_CSR(hpmcounter27, CSR_HPMCOUNTER27) +DECLARE_CSR(hpmcounter28, CSR_HPMCOUNTER28) +DECLARE_CSR(hpmcounter29, CSR_HPMCOUNTER29) +DECLARE_CSR(hpmcounter30, CSR_HPMCOUNTER30) +DECLARE_CSR(hpmcounter31, CSR_HPMCOUNTER31) +DECLARE_CSR(vl, CSR_VL) +DECLARE_CSR(vtype, CSR_VTYPE) +DECLARE_CSR(vlenb, CSR_VLENB) +DECLARE_CSR(sstatus, CSR_SSTATUS) +DECLARE_CSR(sedeleg, CSR_SEDELEG) +DECLARE_CSR(sideleg, CSR_SIDELEG) +DECLARE_CSR(sie, CSR_SIE) +DECLARE_CSR(stvec, CSR_STVEC) +DECLARE_CSR(scounteren, CSR_SCOUNTEREN) +DECLARE_CSR(senvcfg, CSR_SENVCFG) +DECLARE_CSR(sstateen0, CSR_SSTATEEN0) +DECLARE_CSR(sstateen1, CSR_SSTATEEN1) +DECLARE_CSR(sstateen2, CSR_SSTATEEN2) +DECLARE_CSR(sstateen3, CSR_SSTATEEN3) +DECLARE_CSR(sscratch, CSR_SSCRATCH) +DECLARE_CSR(sepc, CSR_SEPC) +DECLARE_CSR(scause, CSR_SCAUSE) +DECLARE_CSR(stval, CSR_STVAL) +DECLARE_CSR(sip, CSR_SIP) +DECLARE_CSR(stimecmp, CSR_STIMECMP) +DECLARE_CSR(siselect, CSR_SISELECT) +DECLARE_CSR(sireg, CSR_SIREG) +DECLARE_CSR(stopei, CSR_STOPEI) +DECLARE_CSR(satp, CSR_SATP) +DECLARE_CSR(scontext, CSR_SCONTEXT) +DECLARE_CSR(vsstatus, CSR_VSSTATUS) +DECLARE_CSR(vsie, CSR_VSIE) +DECLARE_CSR(vstvec, CSR_VSTVEC) +DECLARE_CSR(vsscratch, CSR_VSSCRATCH) +DECLARE_CSR(vsepc, CSR_VSEPC) +DECLARE_CSR(vscause, CSR_VSCAUSE) +DECLARE_CSR(vstval, CSR_VSTVAL) +DECLARE_CSR(vsip, CSR_VSIP) +DECLARE_CSR(vstimecmp, CSR_VSTIMECMP) +DECLARE_CSR(vsiselect, CSR_VSISELECT) +DECLARE_CSR(vsireg, CSR_VSIREG) +DECLARE_CSR(vstopei, CSR_VSTOPEI) +DECLARE_CSR(vsatp, CSR_VSATP) +DECLARE_CSR(hstatus, CSR_HSTATUS) +DECLARE_CSR(hedeleg, CSR_HEDELEG) +DECLARE_CSR(hideleg, CSR_HIDELEG) +DECLARE_CSR(hie, CSR_HIE) +DECLARE_CSR(htimedelta, CSR_HTIMEDELTA) +DECLARE_CSR(hcounteren, CSR_HCOUNTEREN) +DECLARE_CSR(hgeie, CSR_HGEIE) +DECLARE_CSR(hvien, CSR_HVIEN) +DECLARE_CSR(hvictl, CSR_HVICTL) +DECLARE_CSR(henvcfg, CSR_HENVCFG) +DECLARE_CSR(hstateen0, CSR_HSTATEEN0) +DECLARE_CSR(hstateen1, CSR_HSTATEEN1) +DECLARE_CSR(hstateen2, CSR_HSTATEEN2) +DECLARE_CSR(hstateen3, CSR_HSTATEEN3) +DECLARE_CSR(htval, CSR_HTVAL) +DECLARE_CSR(hip, CSR_HIP) +DECLARE_CSR(hvip, CSR_HVIP) +DECLARE_CSR(hviprio1, CSR_HVIPRIO1) +DECLARE_CSR(hviprio2, CSR_HVIPRIO2) +DECLARE_CSR(htinst, CSR_HTINST) +DECLARE_CSR(hgatp, CSR_HGATP) +DECLARE_CSR(hcontext, CSR_HCONTEXT) +DECLARE_CSR(hgeip, CSR_HGEIP) +DECLARE_CSR(vstopi, CSR_VSTOPI) +DECLARE_CSR(scountovf, CSR_SCOUNTOVF) +DECLARE_CSR(stopi, CSR_STOPI) +DECLARE_CSR(utvt, CSR_UTVT) +DECLARE_CSR(unxti, CSR_UNXTI) +DECLARE_CSR(uintstatus, CSR_UINTSTATUS) +DECLARE_CSR(uscratchcsw, CSR_USCRATCHCSW) +DECLARE_CSR(uscratchcswl, CSR_USCRATCHCSWL) +DECLARE_CSR(stvt, CSR_STVT) +DECLARE_CSR(snxti, CSR_SNXTI) +DECLARE_CSR(sintstatus, CSR_SINTSTATUS) +DECLARE_CSR(sscratchcsw, CSR_SSCRATCHCSW) +DECLARE_CSR(sscratchcswl, CSR_SSCRATCHCSWL) +DECLARE_CSR(mtvt, CSR_MTVT) +DECLARE_CSR(mnxti, CSR_MNXTI) +DECLARE_CSR(mintstatus, CSR_MINTSTATUS) +DECLARE_CSR(mscratchcsw, CSR_MSCRATCHCSW) +DECLARE_CSR(mscratchcswl, CSR_MSCRATCHCSWL) +DECLARE_CSR(mstatus, CSR_MSTATUS) +DECLARE_CSR(misa, CSR_MISA) +DECLARE_CSR(medeleg, CSR_MEDELEG) +DECLARE_CSR(mideleg, CSR_MIDELEG) +DECLARE_CSR(mie, CSR_MIE) +DECLARE_CSR(mtvec, CSR_MTVEC) +DECLARE_CSR(mcounteren, CSR_MCOUNTEREN) +DECLARE_CSR(mvien, CSR_MVIEN) +DECLARE_CSR(mvip, CSR_MVIP) +DECLARE_CSR(menvcfg, CSR_MENVCFG) +DECLARE_CSR(mstateen0, CSR_MSTATEEN0) +DECLARE_CSR(mstateen1, CSR_MSTATEEN1) +DECLARE_CSR(mstateen2, CSR_MSTATEEN2) +DECLARE_CSR(mstateen3, CSR_MSTATEEN3) +DECLARE_CSR(mcountinhibit, CSR_MCOUNTINHIBIT) +DECLARE_CSR(mscratch, CSR_MSCRATCH) +DECLARE_CSR(mepc, CSR_MEPC) +DECLARE_CSR(mcause, CSR_MCAUSE) +DECLARE_CSR(mtval, CSR_MTVAL) +DECLARE_CSR(mip, CSR_MIP) +DECLARE_CSR(mtinst, CSR_MTINST) +DECLARE_CSR(mtval2, CSR_MTVAL2) +DECLARE_CSR(miselect, CSR_MISELECT) +DECLARE_CSR(mireg, CSR_MIREG) +DECLARE_CSR(mtopei, CSR_MTOPEI) +DECLARE_CSR(pmpcfg0, CSR_PMPCFG0) +DECLARE_CSR(pmpcfg1, CSR_PMPCFG1) +DECLARE_CSR(pmpcfg2, CSR_PMPCFG2) +DECLARE_CSR(pmpcfg3, CSR_PMPCFG3) +DECLARE_CSR(pmpcfg4, CSR_PMPCFG4) +DECLARE_CSR(pmpcfg5, CSR_PMPCFG5) +DECLARE_CSR(pmpcfg6, CSR_PMPCFG6) +DECLARE_CSR(pmpcfg7, CSR_PMPCFG7) +DECLARE_CSR(pmpcfg8, CSR_PMPCFG8) +DECLARE_CSR(pmpcfg9, CSR_PMPCFG9) +DECLARE_CSR(pmpcfg10, CSR_PMPCFG10) +DECLARE_CSR(pmpcfg11, CSR_PMPCFG11) +DECLARE_CSR(pmpcfg12, CSR_PMPCFG12) +DECLARE_CSR(pmpcfg13, CSR_PMPCFG13) +DECLARE_CSR(pmpcfg14, CSR_PMPCFG14) +DECLARE_CSR(pmpcfg15, CSR_PMPCFG15) +DECLARE_CSR(pmpaddr0, CSR_PMPADDR0) +DECLARE_CSR(pmpaddr1, CSR_PMPADDR1) +DECLARE_CSR(pmpaddr2, CSR_PMPADDR2) +DECLARE_CSR(pmpaddr3, CSR_PMPADDR3) +DECLARE_CSR(pmpaddr4, CSR_PMPADDR4) +DECLARE_CSR(pmpaddr5, CSR_PMPADDR5) +DECLARE_CSR(pmpaddr6, CSR_PMPADDR6) +DECLARE_CSR(pmpaddr7, CSR_PMPADDR7) +DECLARE_CSR(pmpaddr8, CSR_PMPADDR8) +DECLARE_CSR(pmpaddr9, CSR_PMPADDR9) +DECLARE_CSR(pmpaddr10, CSR_PMPADDR10) +DECLARE_CSR(pmpaddr11, CSR_PMPADDR11) +DECLARE_CSR(pmpaddr12, CSR_PMPADDR12) +DECLARE_CSR(pmpaddr13, CSR_PMPADDR13) +DECLARE_CSR(pmpaddr14, CSR_PMPADDR14) +DECLARE_CSR(pmpaddr15, CSR_PMPADDR15) +DECLARE_CSR(pmpaddr16, CSR_PMPADDR16) +DECLARE_CSR(pmpaddr17, CSR_PMPADDR17) +DECLARE_CSR(pmpaddr18, CSR_PMPADDR18) +DECLARE_CSR(pmpaddr19, CSR_PMPADDR19) +DECLARE_CSR(pmpaddr20, CSR_PMPADDR20) +DECLARE_CSR(pmpaddr21, CSR_PMPADDR21) +DECLARE_CSR(pmpaddr22, CSR_PMPADDR22) +DECLARE_CSR(pmpaddr23, CSR_PMPADDR23) +DECLARE_CSR(pmpaddr24, CSR_PMPADDR24) +DECLARE_CSR(pmpaddr25, CSR_PMPADDR25) +DECLARE_CSR(pmpaddr26, CSR_PMPADDR26) +DECLARE_CSR(pmpaddr27, CSR_PMPADDR27) +DECLARE_CSR(pmpaddr28, CSR_PMPADDR28) +DECLARE_CSR(pmpaddr29, CSR_PMPADDR29) +DECLARE_CSR(pmpaddr30, CSR_PMPADDR30) +DECLARE_CSR(pmpaddr31, CSR_PMPADDR31) +DECLARE_CSR(pmpaddr32, CSR_PMPADDR32) +DECLARE_CSR(pmpaddr33, CSR_PMPADDR33) +DECLARE_CSR(pmpaddr34, CSR_PMPADDR34) +DECLARE_CSR(pmpaddr35, CSR_PMPADDR35) +DECLARE_CSR(pmpaddr36, CSR_PMPADDR36) +DECLARE_CSR(pmpaddr37, CSR_PMPADDR37) +DECLARE_CSR(pmpaddr38, CSR_PMPADDR38) +DECLARE_CSR(pmpaddr39, CSR_PMPADDR39) +DECLARE_CSR(pmpaddr40, CSR_PMPADDR40) +DECLARE_CSR(pmpaddr41, CSR_PMPADDR41) +DECLARE_CSR(pmpaddr42, CSR_PMPADDR42) +DECLARE_CSR(pmpaddr43, CSR_PMPADDR43) +DECLARE_CSR(pmpaddr44, CSR_PMPADDR44) +DECLARE_CSR(pmpaddr45, CSR_PMPADDR45) +DECLARE_CSR(pmpaddr46, CSR_PMPADDR46) +DECLARE_CSR(pmpaddr47, CSR_PMPADDR47) +DECLARE_CSR(pmpaddr48, CSR_PMPADDR48) +DECLARE_CSR(pmpaddr49, CSR_PMPADDR49) +DECLARE_CSR(pmpaddr50, CSR_PMPADDR50) +DECLARE_CSR(pmpaddr51, CSR_PMPADDR51) +DECLARE_CSR(pmpaddr52, CSR_PMPADDR52) +DECLARE_CSR(pmpaddr53, CSR_PMPADDR53) +DECLARE_CSR(pmpaddr54, CSR_PMPADDR54) +DECLARE_CSR(pmpaddr55, CSR_PMPADDR55) +DECLARE_CSR(pmpaddr56, CSR_PMPADDR56) +DECLARE_CSR(pmpaddr57, CSR_PMPADDR57) +DECLARE_CSR(pmpaddr58, CSR_PMPADDR58) +DECLARE_CSR(pmpaddr59, CSR_PMPADDR59) +DECLARE_CSR(pmpaddr60, CSR_PMPADDR60) +DECLARE_CSR(pmpaddr61, CSR_PMPADDR61) +DECLARE_CSR(pmpaddr62, CSR_PMPADDR62) +DECLARE_CSR(pmpaddr63, CSR_PMPADDR63) +DECLARE_CSR(mseccfg, CSR_MSECCFG) +DECLARE_CSR(tselect, CSR_TSELECT) +DECLARE_CSR(tdata1, CSR_TDATA1) +DECLARE_CSR(tdata2, CSR_TDATA2) +DECLARE_CSR(tdata3, CSR_TDATA3) +DECLARE_CSR(tinfo, CSR_TINFO) +DECLARE_CSR(tcontrol, CSR_TCONTROL) +DECLARE_CSR(mcontext, CSR_MCONTEXT) +DECLARE_CSR(mscontext, CSR_MSCONTEXT) +DECLARE_CSR(dcsr, CSR_DCSR) +DECLARE_CSR(dpc, CSR_DPC) +DECLARE_CSR(dscratch0, CSR_DSCRATCH0) +DECLARE_CSR(dscratch1, CSR_DSCRATCH1) +DECLARE_CSR(mcycle, CSR_MCYCLE) +DECLARE_CSR(minstret, CSR_MINSTRET) +DECLARE_CSR(mhpmcounter3, CSR_MHPMCOUNTER3) +DECLARE_CSR(mhpmcounter4, CSR_MHPMCOUNTER4) +DECLARE_CSR(mhpmcounter5, CSR_MHPMCOUNTER5) +DECLARE_CSR(mhpmcounter6, CSR_MHPMCOUNTER6) +DECLARE_CSR(mhpmcounter7, CSR_MHPMCOUNTER7) +DECLARE_CSR(mhpmcounter8, CSR_MHPMCOUNTER8) +DECLARE_CSR(mhpmcounter9, CSR_MHPMCOUNTER9) +DECLARE_CSR(mhpmcounter10, CSR_MHPMCOUNTER10) +DECLARE_CSR(mhpmcounter11, CSR_MHPMCOUNTER11) +DECLARE_CSR(mhpmcounter12, CSR_MHPMCOUNTER12) +DECLARE_CSR(mhpmcounter13, CSR_MHPMCOUNTER13) +DECLARE_CSR(mhpmcounter14, CSR_MHPMCOUNTER14) +DECLARE_CSR(mhpmcounter15, CSR_MHPMCOUNTER15) +DECLARE_CSR(mhpmcounter16, CSR_MHPMCOUNTER16) +DECLARE_CSR(mhpmcounter17, CSR_MHPMCOUNTER17) +DECLARE_CSR(mhpmcounter18, CSR_MHPMCOUNTER18) +DECLARE_CSR(mhpmcounter19, CSR_MHPMCOUNTER19) +DECLARE_CSR(mhpmcounter20, CSR_MHPMCOUNTER20) +DECLARE_CSR(mhpmcounter21, CSR_MHPMCOUNTER21) +DECLARE_CSR(mhpmcounter22, CSR_MHPMCOUNTER22) +DECLARE_CSR(mhpmcounter23, CSR_MHPMCOUNTER23) +DECLARE_CSR(mhpmcounter24, CSR_MHPMCOUNTER24) +DECLARE_CSR(mhpmcounter25, CSR_MHPMCOUNTER25) +DECLARE_CSR(mhpmcounter26, CSR_MHPMCOUNTER26) +DECLARE_CSR(mhpmcounter27, CSR_MHPMCOUNTER27) +DECLARE_CSR(mhpmcounter28, CSR_MHPMCOUNTER28) +DECLARE_CSR(mhpmcounter29, CSR_MHPMCOUNTER29) +DECLARE_CSR(mhpmcounter30, CSR_MHPMCOUNTER30) +DECLARE_CSR(mhpmcounter31, CSR_MHPMCOUNTER31) +DECLARE_CSR(mhpmevent3, CSR_MHPMEVENT3) +DECLARE_CSR(mhpmevent4, CSR_MHPMEVENT4) +DECLARE_CSR(mhpmevent5, CSR_MHPMEVENT5) +DECLARE_CSR(mhpmevent6, CSR_MHPMEVENT6) +DECLARE_CSR(mhpmevent7, CSR_MHPMEVENT7) +DECLARE_CSR(mhpmevent8, CSR_MHPMEVENT8) +DECLARE_CSR(mhpmevent9, CSR_MHPMEVENT9) +DECLARE_CSR(mhpmevent10, CSR_MHPMEVENT10) +DECLARE_CSR(mhpmevent11, CSR_MHPMEVENT11) +DECLARE_CSR(mhpmevent12, CSR_MHPMEVENT12) +DECLARE_CSR(mhpmevent13, CSR_MHPMEVENT13) +DECLARE_CSR(mhpmevent14, CSR_MHPMEVENT14) +DECLARE_CSR(mhpmevent15, CSR_MHPMEVENT15) +DECLARE_CSR(mhpmevent16, CSR_MHPMEVENT16) +DECLARE_CSR(mhpmevent17, CSR_MHPMEVENT17) +DECLARE_CSR(mhpmevent18, CSR_MHPMEVENT18) +DECLARE_CSR(mhpmevent19, CSR_MHPMEVENT19) +DECLARE_CSR(mhpmevent20, CSR_MHPMEVENT20) +DECLARE_CSR(mhpmevent21, CSR_MHPMEVENT21) +DECLARE_CSR(mhpmevent22, CSR_MHPMEVENT22) +DECLARE_CSR(mhpmevent23, CSR_MHPMEVENT23) +DECLARE_CSR(mhpmevent24, CSR_MHPMEVENT24) +DECLARE_CSR(mhpmevent25, CSR_MHPMEVENT25) +DECLARE_CSR(mhpmevent26, CSR_MHPMEVENT26) +DECLARE_CSR(mhpmevent27, CSR_MHPMEVENT27) +DECLARE_CSR(mhpmevent28, CSR_MHPMEVENT28) +DECLARE_CSR(mhpmevent29, CSR_MHPMEVENT29) +DECLARE_CSR(mhpmevent30, CSR_MHPMEVENT30) +DECLARE_CSR(mhpmevent31, CSR_MHPMEVENT31) +DECLARE_CSR(mvendorid, CSR_MVENDORID) +DECLARE_CSR(marchid, CSR_MARCHID) +DECLARE_CSR(mimpid, CSR_MIMPID) +DECLARE_CSR(mhartid, CSR_MHARTID) +DECLARE_CSR(mconfigptr, CSR_MCONFIGPTR) +DECLARE_CSR(mtopi, CSR_MTOPI) +DECLARE_CSR(sieh, CSR_SIEH) +DECLARE_CSR(siph, CSR_SIPH) +DECLARE_CSR(stimecmph, CSR_STIMECMPH) +DECLARE_CSR(vsieh, CSR_VSIEH) +DECLARE_CSR(vsiph, CSR_VSIPH) +DECLARE_CSR(vstimecmph, CSR_VSTIMECMPH) +DECLARE_CSR(htimedeltah, CSR_HTIMEDELTAH) +DECLARE_CSR(hidelegh, CSR_HIDELEGH) +DECLARE_CSR(hvienh, CSR_HVIENH) +DECLARE_CSR(henvcfgh, CSR_HENVCFGH) +DECLARE_CSR(hviph, CSR_HVIPH) +DECLARE_CSR(hviprio1h, CSR_HVIPRIO1H) +DECLARE_CSR(hviprio2h, CSR_HVIPRIO2H) +DECLARE_CSR(hstateen0h, CSR_HSTATEEN0H) +DECLARE_CSR(hstateen1h, CSR_HSTATEEN1H) +DECLARE_CSR(hstateen2h, CSR_HSTATEEN2H) +DECLARE_CSR(hstateen3h, CSR_HSTATEEN3H) +DECLARE_CSR(cycleh, CSR_CYCLEH) +DECLARE_CSR(timeh, CSR_TIMEH) +DECLARE_CSR(instreth, CSR_INSTRETH) +DECLARE_CSR(hpmcounter3h, CSR_HPMCOUNTER3H) +DECLARE_CSR(hpmcounter4h, CSR_HPMCOUNTER4H) +DECLARE_CSR(hpmcounter5h, CSR_HPMCOUNTER5H) +DECLARE_CSR(hpmcounter6h, CSR_HPMCOUNTER6H) +DECLARE_CSR(hpmcounter7h, CSR_HPMCOUNTER7H) +DECLARE_CSR(hpmcounter8h, CSR_HPMCOUNTER8H) +DECLARE_CSR(hpmcounter9h, CSR_HPMCOUNTER9H) +DECLARE_CSR(hpmcounter10h, CSR_HPMCOUNTER10H) +DECLARE_CSR(hpmcounter11h, CSR_HPMCOUNTER11H) +DECLARE_CSR(hpmcounter12h, CSR_HPMCOUNTER12H) +DECLARE_CSR(hpmcounter13h, CSR_HPMCOUNTER13H) +DECLARE_CSR(hpmcounter14h, CSR_HPMCOUNTER14H) +DECLARE_CSR(hpmcounter15h, CSR_HPMCOUNTER15H) +DECLARE_CSR(hpmcounter16h, CSR_HPMCOUNTER16H) +DECLARE_CSR(hpmcounter17h, CSR_HPMCOUNTER17H) +DECLARE_CSR(hpmcounter18h, CSR_HPMCOUNTER18H) +DECLARE_CSR(hpmcounter19h, CSR_HPMCOUNTER19H) +DECLARE_CSR(hpmcounter20h, CSR_HPMCOUNTER20H) +DECLARE_CSR(hpmcounter21h, CSR_HPMCOUNTER21H) +DECLARE_CSR(hpmcounter22h, CSR_HPMCOUNTER22H) +DECLARE_CSR(hpmcounter23h, CSR_HPMCOUNTER23H) +DECLARE_CSR(hpmcounter24h, CSR_HPMCOUNTER24H) +DECLARE_CSR(hpmcounter25h, CSR_HPMCOUNTER25H) +DECLARE_CSR(hpmcounter26h, CSR_HPMCOUNTER26H) +DECLARE_CSR(hpmcounter27h, CSR_HPMCOUNTER27H) +DECLARE_CSR(hpmcounter28h, CSR_HPMCOUNTER28H) +DECLARE_CSR(hpmcounter29h, CSR_HPMCOUNTER29H) +DECLARE_CSR(hpmcounter30h, CSR_HPMCOUNTER30H) +DECLARE_CSR(hpmcounter31h, CSR_HPMCOUNTER31H) +DECLARE_CSR(mstatush, CSR_MSTATUSH) +DECLARE_CSR(midelegh, CSR_MIDELEGH) +DECLARE_CSR(mieh, CSR_MIEH) +DECLARE_CSR(mvienh, CSR_MVIENH) +DECLARE_CSR(mviph, CSR_MVIPH) +DECLARE_CSR(menvcfgh, CSR_MENVCFGH) +DECLARE_CSR(mstateen0h, CSR_MSTATEEN0H) +DECLARE_CSR(mstateen1h, CSR_MSTATEEN1H) +DECLARE_CSR(mstateen2h, CSR_MSTATEEN2H) +DECLARE_CSR(mstateen3h, CSR_MSTATEEN3H) +DECLARE_CSR(miph, CSR_MIPH) +DECLARE_CSR(mhpmevent3h, CSR_MHPMEVENT3H) +DECLARE_CSR(mhpmevent4h, CSR_MHPMEVENT4H) +DECLARE_CSR(mhpmevent5h, CSR_MHPMEVENT5H) +DECLARE_CSR(mhpmevent6h, CSR_MHPMEVENT6H) +DECLARE_CSR(mhpmevent7h, CSR_MHPMEVENT7H) +DECLARE_CSR(mhpmevent8h, CSR_MHPMEVENT8H) +DECLARE_CSR(mhpmevent9h, CSR_MHPMEVENT9H) +DECLARE_CSR(mhpmevent10h, CSR_MHPMEVENT10H) +DECLARE_CSR(mhpmevent11h, CSR_MHPMEVENT11H) +DECLARE_CSR(mhpmevent12h, CSR_MHPMEVENT12H) +DECLARE_CSR(mhpmevent13h, CSR_MHPMEVENT13H) +DECLARE_CSR(mhpmevent14h, CSR_MHPMEVENT14H) +DECLARE_CSR(mhpmevent15h, CSR_MHPMEVENT15H) +DECLARE_CSR(mhpmevent16h, CSR_MHPMEVENT16H) +DECLARE_CSR(mhpmevent17h, CSR_MHPMEVENT17H) +DECLARE_CSR(mhpmevent18h, CSR_MHPMEVENT18H) +DECLARE_CSR(mhpmevent19h, CSR_MHPMEVENT19H) +DECLARE_CSR(mhpmevent20h, CSR_MHPMEVENT20H) +DECLARE_CSR(mhpmevent21h, CSR_MHPMEVENT21H) +DECLARE_CSR(mhpmevent22h, CSR_MHPMEVENT22H) +DECLARE_CSR(mhpmevent23h, CSR_MHPMEVENT23H) +DECLARE_CSR(mhpmevent24h, CSR_MHPMEVENT24H) +DECLARE_CSR(mhpmevent25h, CSR_MHPMEVENT25H) +DECLARE_CSR(mhpmevent26h, CSR_MHPMEVENT26H) +DECLARE_CSR(mhpmevent27h, CSR_MHPMEVENT27H) +DECLARE_CSR(mhpmevent28h, CSR_MHPMEVENT28H) +DECLARE_CSR(mhpmevent29h, CSR_MHPMEVENT29H) +DECLARE_CSR(mhpmevent30h, CSR_MHPMEVENT30H) +DECLARE_CSR(mhpmevent31h, CSR_MHPMEVENT31H) +DECLARE_CSR(mnscratch, CSR_MNSCRATCH) +DECLARE_CSR(mnepc, CSR_MNEPC) +DECLARE_CSR(mncause, CSR_MNCAUSE) +DECLARE_CSR(mnstatus, CSR_MNSTATUS) +DECLARE_CSR(mseccfgh, CSR_MSECCFGH) +DECLARE_CSR(mcycleh, CSR_MCYCLEH) +DECLARE_CSR(minstreth, CSR_MINSTRETH) +DECLARE_CSR(mhpmcounter3h, CSR_MHPMCOUNTER3H) +DECLARE_CSR(mhpmcounter4h, CSR_MHPMCOUNTER4H) +DECLARE_CSR(mhpmcounter5h, CSR_MHPMCOUNTER5H) +DECLARE_CSR(mhpmcounter6h, CSR_MHPMCOUNTER6H) +DECLARE_CSR(mhpmcounter7h, CSR_MHPMCOUNTER7H) +DECLARE_CSR(mhpmcounter8h, CSR_MHPMCOUNTER8H) +DECLARE_CSR(mhpmcounter9h, CSR_MHPMCOUNTER9H) +DECLARE_CSR(mhpmcounter10h, CSR_MHPMCOUNTER10H) +DECLARE_CSR(mhpmcounter11h, CSR_MHPMCOUNTER11H) +DECLARE_CSR(mhpmcounter12h, CSR_MHPMCOUNTER12H) +DECLARE_CSR(mhpmcounter13h, CSR_MHPMCOUNTER13H) +DECLARE_CSR(mhpmcounter14h, CSR_MHPMCOUNTER14H) +DECLARE_CSR(mhpmcounter15h, CSR_MHPMCOUNTER15H) +DECLARE_CSR(mhpmcounter16h, CSR_MHPMCOUNTER16H) +DECLARE_CSR(mhpmcounter17h, CSR_MHPMCOUNTER17H) +DECLARE_CSR(mhpmcounter18h, CSR_MHPMCOUNTER18H) +DECLARE_CSR(mhpmcounter19h, CSR_MHPMCOUNTER19H) +DECLARE_CSR(mhpmcounter20h, CSR_MHPMCOUNTER20H) +DECLARE_CSR(mhpmcounter21h, CSR_MHPMCOUNTER21H) +DECLARE_CSR(mhpmcounter22h, CSR_MHPMCOUNTER22H) +DECLARE_CSR(mhpmcounter23h, CSR_MHPMCOUNTER23H) +DECLARE_CSR(mhpmcounter24h, CSR_MHPMCOUNTER24H) +DECLARE_CSR(mhpmcounter25h, CSR_MHPMCOUNTER25H) +DECLARE_CSR(mhpmcounter26h, CSR_MHPMCOUNTER26H) +DECLARE_CSR(mhpmcounter27h, CSR_MHPMCOUNTER27H) +DECLARE_CSR(mhpmcounter28h, CSR_MHPMCOUNTER28H) +DECLARE_CSR(mhpmcounter29h, CSR_MHPMCOUNTER29H) +DECLARE_CSR(mhpmcounter30h, CSR_MHPMCOUNTER30H) +DECLARE_CSR(mhpmcounter31h, CSR_MHPMCOUNTER31H) +#endif +#ifdef DECLARE_CAUSE +DECLARE_CAUSE("misaligned fetch", CAUSE_MISALIGNED_FETCH) +DECLARE_CAUSE("fetch access", CAUSE_FETCH_ACCESS) +DECLARE_CAUSE("illegal instruction", CAUSE_ILLEGAL_INSTRUCTION) +DECLARE_CAUSE("breakpoint", CAUSE_BREAKPOINT) +DECLARE_CAUSE("misaligned load", CAUSE_MISALIGNED_LOAD) +DECLARE_CAUSE("load access", CAUSE_LOAD_ACCESS) +DECLARE_CAUSE("misaligned store", CAUSE_MISALIGNED_STORE) +DECLARE_CAUSE("store access", CAUSE_STORE_ACCESS) +DECLARE_CAUSE("user_ecall", CAUSE_USER_ECALL) +DECLARE_CAUSE("supervisor_ecall", CAUSE_SUPERVISOR_ECALL) +DECLARE_CAUSE("virtual_supervisor_ecall", CAUSE_VIRTUAL_SUPERVISOR_ECALL) +DECLARE_CAUSE("machine_ecall", CAUSE_MACHINE_ECALL) +DECLARE_CAUSE("fetch page fault", CAUSE_FETCH_PAGE_FAULT) +DECLARE_CAUSE("load page fault", CAUSE_LOAD_PAGE_FAULT) +DECLARE_CAUSE("store page fault", CAUSE_STORE_PAGE_FAULT) +DECLARE_CAUSE("fetch guest page fault", CAUSE_FETCH_GUEST_PAGE_FAULT) +DECLARE_CAUSE("load guest page fault", CAUSE_LOAD_GUEST_PAGE_FAULT) +DECLARE_CAUSE("virtual instruction", CAUSE_VIRTUAL_INSTRUCTION) +DECLARE_CAUSE("store guest page fault", CAUSE_STORE_GUEST_PAGE_FAULT) +#endif diff --git a/tests/riscv-test-env/p/link.ld b/tests/riscv-test-env/p/link.ld new file mode 100644 index 000000000..b3e315e78 --- /dev/null +++ b/tests/riscv-test-env/p/link.ld @@ -0,0 +1,17 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +SECTIONS +{ + . = 0x80000000; + .text.init : { *(.text.init) } + . = ALIGN(0x1000); + .tohost : { *(.tohost) } + . = ALIGN(0x1000); + .text : { *(.text) } + . = ALIGN(0x1000); + .data : { *(.data) } + .bss : { *(.bss) } + _end = .; +} + diff --git a/tests/riscv-test-env/p/riscv_test.h b/tests/riscv-test-env/p/riscv_test.h new file mode 100644 index 000000000..e8f4de339 --- /dev/null +++ b/tests/riscv-test-env/p/riscv_test.h @@ -0,0 +1,282 @@ +// See LICENSE for license details. + +#ifndef _ENV_PHYSICAL_SINGLE_CORE_H +#define _ENV_PHYSICAL_SINGLE_CORE_H + +#include "../encoding.h" + +//----------------------------------------------------------------------- +// Begin Macro +//----------------------------------------------------------------------- + +#define RVTEST_RV64U \ + .macro init; \ + .endm + +#define RVTEST_RV64UF \ + .macro init; \ + RVTEST_FP_ENABLE; \ + .endm + +#define RVTEST_RV64UV \ + .macro init; \ + RVTEST_VECTOR_ENABLE; \ + .endm + +#define RVTEST_RV32U \ + .macro init; \ + .endm + +#define RVTEST_RV32UF \ + .macro init; \ + RVTEST_FP_ENABLE; \ + .endm + +#define RVTEST_RV32UV \ + .macro init; \ + RVTEST_VECTOR_ENABLE; \ + .endm + +#define RVTEST_RV64M \ + .macro init; \ + RVTEST_ENABLE_MACHINE; \ + .endm + +#define RVTEST_RV64S \ + .macro init; \ + RVTEST_ENABLE_SUPERVISOR; \ + .endm + +#define RVTEST_RV32M \ + .macro init; \ + RVTEST_ENABLE_MACHINE; \ + .endm + +#define RVTEST_RV32S \ + .macro init; \ + RVTEST_ENABLE_SUPERVISOR; \ + .endm + +#if __riscv_xlen == 64 +# define CHECK_XLEN li a0, 1; slli a0, a0, 31; bgez a0, 1f; RVTEST_PASS; 1: +#else +# define CHECK_XLEN li a0, 1; slli a0, a0, 31; bltz a0, 1f; RVTEST_PASS; 1: +#endif + +#define INIT_XREG \ + li x1, 0; \ + li x2, 0; \ + li x3, 0; \ + li x4, 0; \ + li x5, 0; \ + li x6, 0; \ + li x7, 0; \ + li x8, 0; \ + li x9, 0; \ + li x10, 0; \ + li x11, 0; \ + li x12, 0; \ + li x13, 0; \ + li x14, 0; \ + li x15, 0; \ + li x16, 0; \ + li x17, 0; \ + li x18, 0; \ + li x19, 0; \ + li x20, 0; \ + li x21, 0; \ + li x22, 0; \ + li x23, 0; \ + li x24, 0; \ + li x25, 0; \ + li x26, 0; \ + li x27, 0; \ + li x28, 0; \ + li x29, 0; \ + li x30, 0; \ + li x31, 0; + +#define INIT_PMP \ + la t0, 1f; \ + csrw mtvec, t0; \ + /* Set up a PMP to permit all accesses */ \ + li t0, (1 << (31 + (__riscv_xlen / 64) * (53 - 31))) - 1; \ + csrw pmpaddr0, t0; \ + li t0, PMP_NAPOT | PMP_R | PMP_W | PMP_X; \ + csrw pmpcfg0, t0; \ + .align 2; \ +1: + +#define INIT_RNMI \ + la t0, 1f; \ + csrw mtvec, t0; \ + csrwi CSR_MNSTATUS, MNSTATUS_NMIE; \ + .align 2; \ +1: + +#define INIT_SATP \ + la t0, 1f; \ + csrw mtvec, t0; \ + csrwi satp, 0; \ + .align 2; \ +1: + +#define DELEGATE_NO_TRAPS \ + csrwi mie, 0; \ + la t0, 1f; \ + csrw mtvec, t0; \ + csrwi medeleg, 0; \ + csrwi mideleg, 0; \ + .align 2; \ +1: + +#define RVTEST_ENABLE_SUPERVISOR \ + li a0, MSTATUS_MPP & (MSTATUS_MPP >> 1); \ + csrs mstatus, a0; \ + li a0, SIP_SSIP | SIP_STIP; \ + csrs mideleg, a0; \ + +#define RVTEST_ENABLE_MACHINE \ + li a0, MSTATUS_MPP; \ + csrs mstatus, a0; \ + +#define RVTEST_FP_ENABLE \ + li a0, MSTATUS_FS & (MSTATUS_FS >> 1); \ + csrs mstatus, a0; \ + csrwi fcsr, 0 + +#define RVTEST_VECTOR_ENABLE \ + li a0, (MSTATUS_VS & (MSTATUS_VS >> 1)) | \ + (MSTATUS_FS & (MSTATUS_FS >> 1)); \ + csrs mstatus, a0; \ + csrwi fcsr, 0; \ + csrwi vcsr, 0; + +#define RISCV_MULTICORE_DISABLE \ + csrr a0, mhartid; \ + 1: bnez a0, 1b + +#define EXTRA_TVEC_USER +#define EXTRA_TVEC_MACHINE +#define EXTRA_INIT +#define EXTRA_INIT_TIMER +#define FILTER_TRAP +#define FILTER_PAGE_FAULT + +#define INTERRUPT_HANDLER j other_exception /* No interrupts should occur */ + +#define RVTEST_CODE_BEGIN \ + .section .text.init; \ + .align 6; \ + .weak stvec_handler; \ + .weak mtvec_handler; \ + .globl _start; \ +_start: \ + /* reset vector */ \ + j reset_vector; \ + .align 2; \ +trap_vector: \ + /* test whether the test came from pass/fail */ \ + csrr t5, mcause; \ + li t6, CAUSE_USER_ECALL; \ + beq t5, t6, write_tohost; \ + li t6, CAUSE_SUPERVISOR_ECALL; \ + beq t5, t6, write_tohost; \ + li t6, CAUSE_MACHINE_ECALL; \ + beq t5, t6, write_tohost; \ + /* if an mtvec_handler is defined, jump to it */ \ + la t5, mtvec_handler; \ + beqz t5, 1f; \ + jr t5; \ + /* was it an interrupt or an exception? */ \ + 1: csrr t5, mcause; \ + bgez t5, handle_exception; \ + INTERRUPT_HANDLER; \ +handle_exception: \ + /* we don't know how to handle whatever the exception was */ \ + other_exception: \ + /* some unhandlable exception occurred */ \ + 1: ori TESTNUM, TESTNUM, 1337; \ + write_tohost: \ + sw TESTNUM, tohost, t5; \ + sw zero, tohost + 4, t5; \ + j write_tohost; \ +reset_vector: \ + INIT_XREG; \ + RISCV_MULTICORE_DISABLE; \ + INIT_RNMI; \ + INIT_SATP; \ + INIT_PMP; \ + DELEGATE_NO_TRAPS; \ + li TESTNUM, 0; \ + la t0, trap_vector; \ + csrw mtvec, t0; \ + CHECK_XLEN; \ + /* if an stvec_handler is defined, delegate exceptions to it */ \ + la t0, stvec_handler; \ + beqz t0, 1f; \ + csrw stvec, t0; \ + li t0, (1 << CAUSE_LOAD_PAGE_FAULT) | \ + (1 << CAUSE_STORE_PAGE_FAULT) | \ + (1 << CAUSE_FETCH_PAGE_FAULT) | \ + (1 << CAUSE_MISALIGNED_FETCH) | \ + (1 << CAUSE_USER_ECALL) | \ + (1 << CAUSE_BREAKPOINT); \ + csrw medeleg, t0; \ +1: csrwi mstatus, 0; \ + init; \ + EXTRA_INIT; \ + EXTRA_INIT_TIMER; \ + la t0, 1f; \ + csrw mepc, t0; \ + csrr a0, mhartid; \ + mret; \ +1: + +//----------------------------------------------------------------------- +// End Macro +//----------------------------------------------------------------------- + +#define EXIT_POS 0x40000000; +#define EXIT_CODE 0xdeadbeef; +#define RVTEST_CODE_END \ + li x1, EXIT_POS; \ + li x2, EXIT_CODE; \ + sw x2, 0(x1); + +//----------------------------------------------------------------------- +// Pass/Fail Macro +//----------------------------------------------------------------------- + +#define RVTEST_PASS \ + fence; \ + li TESTNUM, 1; \ + li a7, 93; \ + li a0, 0; \ + +#define TESTNUM gp +#define RVTEST_FAIL \ + fence; \ +1: beqz TESTNUM, 1b; \ + sll TESTNUM, TESTNUM, 1; \ + or TESTNUM, TESTNUM, 1; \ + li a7, 93; \ + addi a0, TESTNUM, 0; \ + +//----------------------------------------------------------------------- +// Data Section Macro +//----------------------------------------------------------------------- + +#define EXTRA_DATA + +#define RVTEST_DATA_BEGIN \ + EXTRA_DATA \ + .pushsection .tohost,"aw",@progbits; \ + .align 6; .global tohost; tohost: .dword 0; .size tohost, 8; \ + .align 6; .global fromhost; fromhost: .dword 0; .size fromhost, 8;\ + .popsection; \ + .align 4; .global begin_signature; begin_signature: + +#define RVTEST_DATA_END .align 4; .global end_signature; end_signature: + +#endif diff --git a/tests/riscv-test-env/pm/link.ld b/tests/riscv-test-env/pm/link.ld new file mode 120000 index 000000000..86b45f9f6 --- /dev/null +++ b/tests/riscv-test-env/pm/link.ld @@ -0,0 +1 @@ +../p/link.ld \ No newline at end of file diff --git a/tests/riscv-test-env/pm/riscv_test.h b/tests/riscv-test-env/pm/riscv_test.h new file mode 100644 index 000000000..38a0e86b8 --- /dev/null +++ b/tests/riscv-test-env/pm/riscv_test.h @@ -0,0 +1,11 @@ +// See LICENSE for license details. + +#ifndef _ENV_PHYSICAL_MULTI_CORE_H +#define _ENV_PHYSICAL_MULTI_CORE_H + +#include "../p/riscv_test.h" + +#undef RISCV_MULTICORE_DISABLE +#define RISCV_MULTICORE_DISABLE + +#endif diff --git a/tests/riscv-test-env/pt/link.ld b/tests/riscv-test-env/pt/link.ld new file mode 120000 index 000000000..86b45f9f6 --- /dev/null +++ b/tests/riscv-test-env/pt/link.ld @@ -0,0 +1 @@ +../p/link.ld \ No newline at end of file diff --git a/tests/riscv-test-env/pt/riscv_test.h b/tests/riscv-test-env/pt/riscv_test.h new file mode 100644 index 000000000..34c2a331a --- /dev/null +++ b/tests/riscv-test-env/pt/riscv_test.h @@ -0,0 +1,69 @@ +// See LICENSE for license details. + +#ifndef _ENV_PHYSICAL_SINGLE_CORE_TIMER_H +#define _ENV_PHYSICAL_SINGLE_CORE_TIMER_H + +#include "../p/riscv_test.h" + +#define TIMER_INTERVAL 2 + +#undef EXTRA_INIT_TIMER +#define EXTRA_INIT_TIMER \ + li a0, MIP_MTIP; \ + csrs mie, a0; \ + csrr a0, mtime; \ + addi a0, a0, TIMER_INTERVAL; \ + csrw mtimecmp, a0; \ + +#if SSTATUS_XS != 0x18000 +# error +#endif +#define XS_SHIFT 15 + +#undef INTERRUPT_HANDLER +#define INTERRUPT_HANDLER \ + slli t5, t5, 1; \ + srli t5, t5, 1; \ + add t5, t5, -IRQ_M_TIMER; \ + bnez t5, other_exception; /* other interrups shouldn't happen */\ + csrr t5, mtime; \ + addi t5, t5, TIMER_INTERVAL; \ + csrw mtimecmp, t5; \ + mret; \ + +//----------------------------------------------------------------------- +// Data Section Macro +//----------------------------------------------------------------------- + +#undef EXTRA_DATA +#define EXTRA_DATA \ + .align 3; \ +regspill: \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ + .dword 0xdeadbeefcafebabe; \ +evac: \ + .skip 32768; \ + +#endif diff --git a/tests/riscv-test-env/v/entry.S b/tests/riscv-test-env/v/entry.S new file mode 100644 index 000000000..13d46a349 --- /dev/null +++ b/tests/riscv-test-env/v/entry.S @@ -0,0 +1,164 @@ +#include "riscv_test.h" + +#if __riscv_xlen == 64 +# define STORE sd +# define LOAD ld +# define REGBYTES 8 +#else +# define STORE sw +# define LOAD lw +# define REGBYTES 4 +#endif + +#define STACK_TOP (_end + RISCV_PGSIZE * 4) + + .section ".text.init","ax",@progbits + .globl _start + .align 2 +_start: + j handle_reset + + /* NMI vector */ + .align 2 +nmi_vector: + j wtf + + .align 2 +trap_vector: + j wtf + +handle_reset: + li x1, 0 + li x2, 0 + li x3, 0 + li x4, 0 + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10, 0 + li x11, 0 + li x12, 0 + li x13, 0 + li x14, 0 + li x15, 0 + li x16, 0 + li x17, 0 + li x18, 0 + li x19, 0 + li x20, 0 + li x21, 0 + li x22, 0 + li x23, 0 + li x24, 0 + li x25, 0 + li x26, 0 + li x27, 0 + li x28, 0 + li x29, 0 + li x30, 0 + li x31, 0 + + INIT_RNMI + + la t0, trap_vector + csrw mtvec, t0 + la sp, STACK_TOP - SIZEOF_TRAPFRAME_T + csrr t0, mhartid + slli t0, t0, 12 + add sp, sp, t0 + csrw mscratch, sp + call extra_boot + la a0, userstart + j vm_boot + + .globl pop_tf +pop_tf: + LOAD t0,33*REGBYTES(a0) + csrw sepc,t0 + LOAD x1,1*REGBYTES(a0) + LOAD x2,2*REGBYTES(a0) + LOAD x3,3*REGBYTES(a0) + LOAD x4,4*REGBYTES(a0) + LOAD x5,5*REGBYTES(a0) + LOAD x6,6*REGBYTES(a0) + LOAD x7,7*REGBYTES(a0) + LOAD x8,8*REGBYTES(a0) + LOAD x9,9*REGBYTES(a0) + LOAD x11,11*REGBYTES(a0) + LOAD x12,12*REGBYTES(a0) + LOAD x13,13*REGBYTES(a0) + LOAD x14,14*REGBYTES(a0) + LOAD x15,15*REGBYTES(a0) + LOAD x16,16*REGBYTES(a0) + LOAD x17,17*REGBYTES(a0) + LOAD x18,18*REGBYTES(a0) + LOAD x19,19*REGBYTES(a0) + LOAD x20,20*REGBYTES(a0) + LOAD x21,21*REGBYTES(a0) + LOAD x22,22*REGBYTES(a0) + LOAD x23,23*REGBYTES(a0) + LOAD x24,24*REGBYTES(a0) + LOAD x25,25*REGBYTES(a0) + LOAD x26,26*REGBYTES(a0) + LOAD x27,27*REGBYTES(a0) + LOAD x28,28*REGBYTES(a0) + LOAD x29,29*REGBYTES(a0) + LOAD x30,30*REGBYTES(a0) + LOAD x31,31*REGBYTES(a0) + LOAD a0,10*REGBYTES(a0) + sret + + .global trap_entry + .align 2 +trap_entry: + csrrw sp, sscratch, sp + + # save gprs + STORE x1,1*REGBYTES(sp) + STORE x3,3*REGBYTES(sp) + STORE x4,4*REGBYTES(sp) + STORE x5,5*REGBYTES(sp) + STORE x6,6*REGBYTES(sp) + STORE x7,7*REGBYTES(sp) + STORE x8,8*REGBYTES(sp) + STORE x9,9*REGBYTES(sp) + STORE x10,10*REGBYTES(sp) + STORE x11,11*REGBYTES(sp) + STORE x12,12*REGBYTES(sp) + STORE x13,13*REGBYTES(sp) + STORE x14,14*REGBYTES(sp) + STORE x15,15*REGBYTES(sp) + STORE x16,16*REGBYTES(sp) + STORE x17,17*REGBYTES(sp) + STORE x18,18*REGBYTES(sp) + STORE x19,19*REGBYTES(sp) + STORE x20,20*REGBYTES(sp) + STORE x21,21*REGBYTES(sp) + STORE x22,22*REGBYTES(sp) + STORE x23,23*REGBYTES(sp) + STORE x24,24*REGBYTES(sp) + STORE x25,25*REGBYTES(sp) + STORE x26,26*REGBYTES(sp) + STORE x27,27*REGBYTES(sp) + STORE x28,28*REGBYTES(sp) + STORE x29,29*REGBYTES(sp) + STORE x30,30*REGBYTES(sp) + STORE x31,31*REGBYTES(sp) + + csrrw t0,sscratch,sp + STORE t0,2*REGBYTES(sp) + + # get sr, epc, badvaddr, cause + csrr t0,sstatus + STORE t0,32*REGBYTES(sp) + csrr t0,sepc + STORE t0,33*REGBYTES(sp) + csrr t0,stval + STORE t0,34*REGBYTES(sp) + csrr t0,scause + STORE t0,35*REGBYTES(sp) + + move a0, sp + j handle_trap diff --git a/tests/riscv-test-env/v/link.ld b/tests/riscv-test-env/v/link.ld new file mode 120000 index 000000000..86b45f9f6 --- /dev/null +++ b/tests/riscv-test-env/v/link.ld @@ -0,0 +1 @@ +../p/link.ld \ No newline at end of file diff --git a/tests/riscv-test-env/v/riscv_test.h b/tests/riscv-test-env/v/riscv_test.h new file mode 100644 index 000000000..f56c0228c --- /dev/null +++ b/tests/riscv-test-env/v/riscv_test.h @@ -0,0 +1,94 @@ +// See LICENSE for license details. + +#ifndef _ENV_VIRTUAL_SINGLE_CORE_H +#define _ENV_VIRTUAL_SINGLE_CORE_H + +#include "../p/riscv_test.h" + +//----------------------------------------------------------------------- +// Begin Macro +//----------------------------------------------------------------------- + +#undef RVTEST_FP_ENABLE +#define RVTEST_FP_ENABLE fssr x0 + +#undef RVTEST_VECTOR_ENABLE +#define RVTEST_VECTOR_ENABLE \ + csrwi fcsr, 0; \ + csrwi vcsr, 0; + +#undef RVTEST_CODE_BEGIN +#define RVTEST_CODE_BEGIN \ + .text; \ + .global extra_boot; \ +extra_boot: \ + EXTRA_INIT \ + ret; \ +.global trap_filter; \ +trap_filter: \ + FILTER_TRAP \ + li a0, 0; \ + ret; \ +.global pf_filter; \ +pf_filter: \ + FILTER_PAGE_FAULT \ + li a0, 0; \ + ret; \ + .global userstart; \ +userstart: \ + init + +//----------------------------------------------------------------------- +// Pass/Fail Macro +//----------------------------------------------------------------------- + +#undef RVTEST_PASS +#define RVTEST_PASS li a0, 1; scall + +#undef RVTEST_FAIL +#define RVTEST_FAIL sll a0, TESTNUM, 1; 1:beqz a0, 1b; or a0, a0, 1; scall; + +//----------------------------------------------------------------------- +// Data Section Macro +//----------------------------------------------------------------------- + +#undef RVTEST_DATA_END +#define RVTEST_DATA_END + +//----------------------------------------------------------------------- +// Supervisor mode definitions and macros +//----------------------------------------------------------------------- + +#ifndef LFSR_BITS +#define LFSR_BITS 6 +#endif + +#define MAX_TEST_PAGES ((1 << LFSR_BITS)-1) // this must be the period of the LFSR below +#define LFSR_NEXT(x) (((((x)^((x)>>1)) & 1) << (LFSR_BITS-1)) | ((x) >> 1)) + +#define PGSHIFT 12 +#define PGSIZE (1UL << PGSHIFT) + +#define SIZEOF_TRAPFRAME_T ((__riscv_xlen / 8) * 36) + +#ifndef __ASSEMBLER__ + +typedef unsigned long pte_t; +#define LEVELS (sizeof(pte_t) == sizeof(uint64_t) ? 3 : 2) +#define PTIDXBITS (PGSHIFT - (sizeof(pte_t) == 8 ? 3 : 2)) +#define VPN_BITS (PTIDXBITS * LEVELS) +#define VA_BITS (VPN_BITS + PGSHIFT) +#define PTES_PER_PT (1UL << RISCV_PGLEVEL_BITS) +#define MEGAPAGE_SIZE (PTES_PER_PT * PGSIZE) + +typedef struct +{ + long gpr[32]; + long sr; + long epc; + long badvaddr; + long cause; +} trapframe_t; +#endif + +#endif diff --git a/tests/riscv-test-env/v/string.c b/tests/riscv-test-env/v/string.c new file mode 100644 index 000000000..4ffedc0a4 --- /dev/null +++ b/tests/riscv-test-env/v/string.c @@ -0,0 +1,114 @@ +#include +#include +#include + +void* memcpy(void* dest, const void* src, size_t len) +{ + if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) { + const uintptr_t* s = src; + uintptr_t *d = dest; + while (d < (uintptr_t*)(dest + len)) + *d++ = *s++; + } else { + const char* s = src; + char *d = dest; + while (d < (char*)(dest + len)) + *d++ = *s++; + } + return dest; +} + +void* memset(void* dest, int byte, size_t len) +{ + if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) { + uintptr_t word = byte & 0xFF; + word |= word << 8; + word |= word << 16; + word |= word << 16 << 16; + + uintptr_t *d = dest; + while (d < (uintptr_t*)(dest + len)) + *d++ = word; + } else { + char *d = dest; + while (d < (char*)(dest + len)) + *d++ = byte; + } + return dest; +} + +size_t strlen(const char *s) +{ + const char *p = s; + while (*p) + p++; + return p - s; +} + +int strcmp(const char* s1, const char* s2) +{ + unsigned char c1, c2; + + do { + c1 = *s1++; + c2 = *s2++; + } while (c1 != 0 && c1 == c2); + + return c1 - c2; +} + +int memcmp(const void* s1, const void* s2, size_t n) +{ + if ((((uintptr_t)s1 | (uintptr_t)s2) & (sizeof(uintptr_t)-1)) == 0) { + const uintptr_t* u1 = s1; + const uintptr_t* u2 = s2; + const uintptr_t* end = u1 + (n / sizeof(uintptr_t)); + while (u1 < end) { + if (*u1 != *u2) + break; + u1++; + u2++; + } + n -= (const void*)u1 - s1; + s1 = u1; + s2 = u2; + } + + while (n--) { + unsigned char c1 = *(const unsigned char*)s1++; + unsigned char c2 = *(const unsigned char*)s2++; + if (c1 != c2) + return c1 - c2; + } + + return 0; +} + +char* strcpy(char* dest, const char* src) +{ + char* d = dest; + while ((*d++ = *src++)) + ; + return dest; +} + +long atol(const char* str) +{ + long res = 0; + int sign = 0; + + while (*str == ' ') + str++; + + if (*str == '-' || *str == '+') { + sign = *str == '-'; + str++; + } + + while (*str) { + res *= 10; + res += *str++ - '0'; + } + + return sign ? -res : res; +} diff --git a/tests/riscv-test-env/v/vm.c b/tests/riscv-test-env/v/vm.c new file mode 100644 index 000000000..178d90ba3 --- /dev/null +++ b/tests/riscv-test-env/v/vm.c @@ -0,0 +1,315 @@ +// See LICENSE for license details. + +#include +#include +#include + +#include "riscv_test.h" + +#if __riscv_xlen == 32 +# define SATP_MODE_CHOICE SATP_MODE_SV32 +#elif defined(Sv48) +# define SATP_MODE_CHOICE SATP_MODE_SV48 +#else +# define SATP_MODE_CHOICE SATP_MODE_SV39 +#endif + +void trap_entry(); +void pop_tf(trapframe_t*); + +extern volatile uint64_t tohost; +extern volatile uint64_t fromhost; + +static void do_tohost(uint64_t tohost_value) +{ + while (tohost) + fromhost = 0; + tohost = tohost_value; +} + +#define pa2kva(pa) ((void*)(pa) - DRAM_BASE - MEGAPAGE_SIZE) +#define uva2kva(pa) ((void*)(pa) - MEGAPAGE_SIZE) + +#define flush_page(addr) asm volatile ("sfence.vma %0" : : "r" (addr) : "memory") + +static uint64_t lfsr63(uint64_t x) +{ + uint64_t bit = (x ^ (x >> 1)) & 1; + return (x >> 1) | (bit << 62); +} + +static void cputchar(int x) +{ + do_tohost(0x0101000000000000 | (unsigned char)x); +} + +static void cputstring(const char* s) +{ + while (*s) + cputchar(*s++); +} + +static void terminate(int code) +{ + do_tohost(code); + while (1); +} + +void wtf() +{ + terminate(841); +} + +#define stringify1(x) #x +#define stringify(x) stringify1(x) +#define assert(x) do { \ + if (x) break; \ + cputstring("Assertion failed: " stringify(x) "\n"); \ + terminate(3); \ +} while(0) + +#define l1pt pt[0] +#define user_l2pt pt[1] +#if SATP_MODE_CHOICE == SATP_MODE_SV48 +# define NPT 6 +# define kernel_l2pt pt[2] +# define kernel_l3pt pt[3] +# define user_l3pt pt[4] +# define user_llpt pt[5] +#elif SATP_MODE_CHOICE == SATP_MODE_SV39 +# define NPT 4 +# define kernel_l2pt pt[2] +# define user_llpt pt[3] +#elif SATP_MODE_CHOICE == SATP_MODE_SV32 +# define NPT 2 +# define user_llpt user_l2pt +#else +# error Unknown SATP_MODE_CHOICE +#endif +pte_t pt[NPT][PTES_PER_PT] __attribute__((aligned(PGSIZE))); + +typedef struct { pte_t addr; void* next; } freelist_t; + +freelist_t user_mapping[MAX_TEST_PAGES]; +freelist_t freelist_nodes[MAX_TEST_PAGES]; +freelist_t *freelist_head, *freelist_tail; + +void printhex(uint64_t x) +{ + char str[17]; + for (int i = 0; i < 16; i++) + { + str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10); + x >>= 4; + } + str[16] = 0; + + cputstring(str); +} + +static void evict(unsigned long addr) +{ + assert(addr >= PGSIZE && addr < MAX_TEST_PAGES * PGSIZE); + addr = addr/PGSIZE*PGSIZE; + + freelist_t* node = &user_mapping[addr/PGSIZE]; + if (node->addr) + { + // check accessed and dirty bits + assert(user_llpt[addr/PGSIZE] & PTE_A); + uintptr_t sstatus = set_csr(sstatus, SSTATUS_SUM); + if (memcmp((void*)addr, uva2kva(addr), PGSIZE)) { + assert(user_llpt[addr/PGSIZE] & PTE_D); + memcpy(uva2kva(addr), (void*)addr, PGSIZE); + } + write_csr(sstatus, sstatus); + + user_mapping[addr/PGSIZE].addr = 0; + + if (freelist_tail == 0) + freelist_head = freelist_tail = node; + else + { + freelist_tail->next = node; + freelist_tail = node; + } + } +} + +extern int pf_filter(uintptr_t addr, uintptr_t *pte, int *copy); +extern int trap_filter(trapframe_t *tf); + +void handle_fault(uintptr_t addr, uintptr_t cause) +{ + uintptr_t filter_encodings = 0; + int copy_page = 1; + + assert(addr >= PGSIZE && addr < MAX_TEST_PAGES * PGSIZE); + addr = addr/PGSIZE*PGSIZE; + + if (user_llpt[addr/PGSIZE]) { + if (!(user_llpt[addr/PGSIZE] & PTE_A)) { + user_llpt[addr/PGSIZE] |= PTE_A; + } else { + assert(!(user_llpt[addr/PGSIZE] & PTE_D) && cause == CAUSE_STORE_PAGE_FAULT); + user_llpt[addr/PGSIZE] |= PTE_D; + } + flush_page(addr); + return; + } + + freelist_t* node = freelist_head; + assert(node); + freelist_head = node->next; + if (freelist_head == freelist_tail) + freelist_tail = 0; + + uintptr_t new_pte = (node->addr >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V | PTE_U | PTE_R | PTE_W | PTE_X; + + if (pf_filter(addr, &filter_encodings, ©_page)) { + new_pte = (node->addr >> PGSHIFT << PTE_PPN_SHIFT) | filter_encodings; + } + + user_llpt[addr/PGSIZE] = new_pte | PTE_A | PTE_D; + flush_page(addr); + + assert(user_mapping[addr/PGSIZE].addr == 0); + user_mapping[addr/PGSIZE] = *node; + + uintptr_t sstatus = set_csr(sstatus, SSTATUS_SUM); + memcpy((void*)addr, uva2kva(addr), PGSIZE); + write_csr(sstatus, sstatus); + + user_llpt[addr/PGSIZE] = new_pte; + flush_page(addr); + + asm volatile ("fence.i"); +} + +void handle_trap(trapframe_t* tf) +{ + if (trap_filter(tf)) { + pop_tf(tf); + } + + if (tf->cause == CAUSE_USER_ECALL) + { + int n = tf->gpr[10]; + + for (long i = 1; i < MAX_TEST_PAGES; i++) + evict(i*PGSIZE); + + terminate(n); + } + else if (tf->cause == CAUSE_ILLEGAL_INSTRUCTION) + { + assert(tf->epc % 4 == 0); + + int* fssr; + asm ("jal %0, 1f; fssr x0; 1:" : "=r"(fssr)); + + if (*(int*)tf->epc == *fssr) + terminate(1); // FP test on non-FP hardware. "succeed." + else + assert(!"illegal instruction"); + tf->epc += 4; + } + else if (tf->cause == CAUSE_FETCH_PAGE_FAULT || tf->cause == CAUSE_LOAD_PAGE_FAULT || tf->cause == CAUSE_STORE_PAGE_FAULT) + handle_fault(tf->badvaddr, tf->cause); + else + assert(!"unexpected exception"); + + pop_tf(tf); +} + +static void coherence_torture() +{ + // cause coherence misses without affecting program semantics + uint64_t random = ENTROPY; + while (1) { + uintptr_t paddr = DRAM_BASE + ((random % (2 * (MAX_TEST_PAGES + 1) * PGSIZE)) & -4); +#ifdef __riscv_atomic + if (random & 1) // perform a no-op write + asm volatile ("amoadd.w zero, zero, (%0)" :: "r"(paddr)); + else // perform a read +#endif + asm volatile ("lw zero, (%0)" :: "r"(paddr)); + random = lfsr63(random); + } +} + +void vm_boot(uintptr_t test_addr) +{ + uint64_t random = ENTROPY; + if (read_csr(mhartid) > 0) + coherence_torture(); + + _Static_assert(SIZEOF_TRAPFRAME_T == sizeof(trapframe_t), "???"); + +#if (MAX_TEST_PAGES > PTES_PER_PT) || (DRAM_BASE % MEGAPAGE_SIZE) != 0 +# error +#endif + // map user to lowermost megapage + l1pt[0] = ((pte_t)user_l2pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + // map kernel to uppermost megapage +#if SATP_MODE_CHOICE == SATP_MODE_SV48 + l1pt[PTES_PER_PT-1] = ((pte_t)kernel_l2pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + kernel_l2pt[PTES_PER_PT-1] = ((pte_t)kernel_l3pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + kernel_l3pt[PTES_PER_PT-1] = (DRAM_BASE/RISCV_PGSIZE << PTE_PPN_SHIFT) | PTE_V | PTE_R | PTE_W | PTE_X | PTE_A | PTE_D; + user_l2pt[0] = ((pte_t)user_l3pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + user_l3pt[0] = ((pte_t)user_llpt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; +#elif SATP_MODE_CHOICE == SATP_MODE_SV39 + l1pt[PTES_PER_PT-1] = ((pte_t)kernel_l2pt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; + kernel_l2pt[PTES_PER_PT-1] = (DRAM_BASE/RISCV_PGSIZE << PTE_PPN_SHIFT) | PTE_V | PTE_R | PTE_W | PTE_X | PTE_A | PTE_D; + user_l2pt[0] = ((pte_t)user_llpt >> PGSHIFT << PTE_PPN_SHIFT) | PTE_V; +#elif SATP_MODE_CHOICE == SATP_MODE_SV32 + l1pt[PTES_PER_PT-1] = (DRAM_BASE/RISCV_PGSIZE << PTE_PPN_SHIFT) | PTE_V | PTE_R | PTE_W | PTE_X | PTE_A | PTE_D; +#else +# error +#endif + uintptr_t vm_choice = SATP_MODE_CHOICE; + uintptr_t satp_value = ((uintptr_t)l1pt >> PGSHIFT) + | (vm_choice * (SATP_MODE & ~(SATP_MODE<<1))); + write_csr(satp, satp_value); + if (read_csr(satp) != satp_value) + assert(!"unsupported satp mode"); + + // Set up PMPs if present, ignoring illegal instruction trap if not. + uintptr_t pmpc = PMP_NAPOT | PMP_R | PMP_W | PMP_X; + uintptr_t pmpa = ((uintptr_t)1 << (__riscv_xlen == 32 ? 31 : 53)) - 1; + asm volatile ("la t0, 1f\n\t" + "csrrw t0, mtvec, t0\n\t" + "csrw pmpaddr0, %1\n\t" + "csrw pmpcfg0, %0\n\t" + ".align 2\n\t" + "1: csrw mtvec, t0" + : : "r" (pmpc), "r" (pmpa) : "t0"); + + // set up supervisor trap handling + write_csr(stvec, pa2kva(trap_entry)); + write_csr(sscratch, pa2kva(read_csr(mscratch))); + write_csr(medeleg, + (1 << CAUSE_USER_ECALL) | + (1 << CAUSE_FETCH_PAGE_FAULT) | + (1 << CAUSE_LOAD_PAGE_FAULT) | + (1 << CAUSE_STORE_PAGE_FAULT)); + // FPU on; accelerator on; vector unit on + write_csr(mstatus, MSTATUS_FS | MSTATUS_XS | MSTATUS_VS); + write_csr(mie, 0); + + random = 1 + (random % MAX_TEST_PAGES); + freelist_head = pa2kva((void*)&freelist_nodes[0]); + freelist_tail = pa2kva(&freelist_nodes[MAX_TEST_PAGES-1]); + for (long i = 0; i < MAX_TEST_PAGES; i++) + { + freelist_nodes[i].addr = DRAM_BASE + (MAX_TEST_PAGES + random)*PGSIZE; + freelist_nodes[i].next = pa2kva(&freelist_nodes[i+1]); + random = LFSR_NEXT(random); + } + freelist_nodes[MAX_TEST_PAGES-1].next = 0; + + trapframe_t tf; + memset(&tf, 0, sizeof(tf)); + tf.epc = test_addr - DRAM_BASE; + pop_tf(&tf); +} From e946cc0082eb01e89ac45eb7d10e614d279e843b Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Wed, 24 Jul 2024 12:33:06 +0800 Subject: [PATCH 108/140] [rocketemu] add quit functionality to terminate simulation --- rocketemu/dpi/dpi_pre_link.cc | 11 ++++++++++- rocketemu/dpi/dpi_pre_link.h | 2 ++ rocketemu/driver/src/dpi.rs | 8 ++++++++ rocketemu/driver/src/sim.rs | 9 +++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/rocketemu/dpi/dpi_pre_link.cc b/rocketemu/dpi/dpi_pre_link.cc index be7293350..e35b4dccd 100644 --- a/rocketemu/dpi/dpi_pre_link.cc +++ b/rocketemu/dpi/dpi_pre_link.cc @@ -8,6 +8,12 @@ class VTestBench; VerilatedContext *contextp; VTestBench *topp; +bool quit; + +void quit_c() { + quit = true; +} + int verilator_main_c(int argc, char **argv) { // Setup context, defaults, and parse command line Verilated::debug(0); @@ -15,11 +21,14 @@ int verilator_main_c(int argc, char **argv) { contextp->fatalOnError(false); contextp->commandArgs(argc, argv); + // Set quit flag, true means quit + quit = false; + // Construct the Verilated model, from Vtop.h generated from Verilating topp = new VTestBench(contextp); // Simulate until $finish - while (!contextp->gotFinish()) { + while (!contextp->gotFinish() && !quit) { // Evaluate model topp->eval(); // Advance time diff --git a/rocketemu/dpi/dpi_pre_link.h b/rocketemu/dpi/dpi_pre_link.h index cf2752e97..94d8bd51a 100644 --- a/rocketemu/dpi/dpi_pre_link.h +++ b/rocketemu/dpi/dpi_pre_link.h @@ -13,6 +13,8 @@ extern "C" { int verilator_main_c(int argc, char **argv); +void quit_c(); + #ifdef VM_TRACE void dump_wave_c(char *path); #endif diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 65633f63d..945b545be 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -178,6 +178,8 @@ unsafe extern "C" fn cosim_watchdog_rs(target: *mut (), reason: *mut c_char) { extern "C" { fn verilator_main_c(argc: c_int, argv: *mut *mut c_char) -> c_int; + fn quit_c(); + #[cfg(feature = "trace")] fn dump_wave_c(path: *const c_char); @@ -190,6 +192,12 @@ pub(crate) fn get_t() -> u64 { unsafe { get_t_c() / 20 } } +pub(crate) fn quit() { + unsafe { + quit_c(); + } +} + pub(crate) fn verilator_main() { let mut c_args_ptr: Vec<*mut c_char> = std::env::args() .collect::>() diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index cea85af2c..bd5b2f3c5 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -1,6 +1,7 @@ #[cfg(feature = "trace")] use crate::dpi::dump_wave; use crate::dpi::get_t; +use crate::dpi::quit; use clap::{arg, Parser}; use std::collections::HashMap; @@ -23,6 +24,8 @@ pub(crate) struct AxiReadPayload { pub(crate) data: Vec, } +const EXIT_POS: u32 = 0x4000_0000; + #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] pub struct SimulationArgs { @@ -265,6 +268,12 @@ impl Simulator { get_t() ); + if addr == EXIT_POS { + info!("exit with code: {:x?}", data); + quit(); + return; + } + self.write_mem(addr, self.dlen / 8, strobe, data); } From 935318e0d8ecfc8d9c2f991bde0fc9c9ad65e1dc Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Wed, 24 Jul 2024 14:49:04 +0800 Subject: [PATCH 109/140] [rocketemu] exit with right EXIT_CODE check --- rocketemu/driver/src/dpi.rs | 12 +++++++----- rocketemu/driver/src/sim.rs | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 945b545be..8103ebb1c 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -22,17 +22,18 @@ pub type SvBitVecVal = u32; unsafe fn load_from_payload( payload: &*const SvBitVecVal, aw_size: c_longlong, - data_width: u32, + data_width: usize, + dlen: usize, ) -> (Vec, &[u8]) { let src = *payload as *mut u8; - let data_width_in_byte = (data_width / 8) as usize; - let strb_width_in_byte = data_width_in_byte.div_ceil(8); // ceil divide by 8 to get byte width + let data_width_in_byte = dlen / 8; + let strb_width_in_byte = dlen / data_width; let payload_size_in_byte = strb_width_in_byte + data_width_in_byte; // data width in byte let byte_vec = std::slice::from_raw_parts(src, payload_size_in_byte); let strobe = &byte_vec[0..strb_width_in_byte]; let data = &byte_vec[strb_width_in_byte..]; - let strb_width_in_bit = std::cmp::min(8, data_width_in_byte); + let strb_width_in_bit = data_width / 8; let masks: Vec = strobe .into_iter() .flat_map(|strb| { @@ -97,7 +98,8 @@ unsafe extern "C" fn axi_write_loadStoreAXI_rs( ); let sim = &mut *(target as *mut Simulator); - let (strobe, data) = load_from_payload(&payload, 1 << awsize, sim.dlen); + let data_width = 32; // TODO: get from sim + let (strobe, data) = load_from_payload(&payload, 1 << awsize, data_width, sim.dlen as usize); sim.axi_write(awaddr as u32, &strobe, data); } diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index bd5b2f3c5..2c9eb45f5 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -25,6 +25,7 @@ pub(crate) struct AxiReadPayload { } const EXIT_POS: u32 = 0x4000_0000; +const EXIT_CODE: u32 = 0xdead_beef; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -263,15 +264,15 @@ impl Simulator { get_t() ); let data_hex = hex::encode(data); - info!( - "[{}] axi_write (addr={addr:#x}, data={data_hex})", - get_t() - ); - - if addr == EXIT_POS { - info!("exit with code: {:x?}", data); - quit(); - return; + info!("[{}] axi_write (addr={addr:#x}, data={data_hex})", get_t()); + + if addr == EXIT_POS && data.len() >= 4 { + let exit_code = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + if exit_code == EXIT_CODE { + info!("exit with code: {:x?}", exit_code); + quit(); + return; + } } self.write_mem(addr, self.dlen / 8, strobe, data); From 4d9efece5befe5b2089108cb3bbd719eb64c2184 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Fri, 26 Jul 2024 00:48:12 +0800 Subject: [PATCH 110/140] [rocketemu] add reg write event probe --- rocketemu/src/TestBench.scala | 5 +++++ rocketv/src/RocketCore.scala | 16 ++++++++++++++++ rocketv/src/RocketTile.scala | 6 ++++++ 3 files changed, 27 insertions(+) diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index c139bf4d8..ebf8008d1 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -6,6 +6,7 @@ package org.chipsalliance.t1.rocketv import chisel3._ import chisel3.experimental.{ExtModule, SerializableModuleGenerator} import chisel3.experimental.dataview.DataViewable +import chisel3.probe.{Probe, define} import chisel3.util.{log2Ceil, HasExtModuleInline, PopCount, UIntToOH, Valid} import chisel3.util.circt.dpi.RawUnclockedNonVoidFunctionCall import org.chipsalliance.amba.axi4.bundle._ @@ -71,6 +72,10 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar // FIXME: get resetVector from simulator instead of hard code here dut.io.resetVector := (BigInt(1) << 31).U + // output probes + val rocketProbe = probe.read(dut.io.rocketProbe) + when(rocketProbe.rfWen)(printf(cf"""{"event":"RegWrite","addr":${rocketProbe.rfWaddr},"data":${rocketProbe.rfWdata},"cycle":${simulationTime}}\n""")) + // Memory Drivers val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] val instFetchAgent = Module( diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala index 2efb762e3..911df4dbf 100644 --- a/rocketv/src/RocketCore.scala +++ b/rocketv/src/RocketCore.scala @@ -7,12 +7,20 @@ package org.chipsalliance.rocketv import chisel3._ import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.probe.{Probe, ProbeValue, define} import chisel3.util.circt.ClockGate import chisel3.util.experimental.decode.DecodeBundle import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up} import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes import org.chipsalliance.rvdecoderdb.Instruction +class RocketProbe(param: RocketParameter) extends Bundle { + // reg file + val rfWen = Bool() + val rfWaddr = UInt(param.lgNXRegs.W) + val rfWdata = UInt(param.xLen.W) +} + object RocketParameter { implicit def rwP: upickle.default.ReadWriter[RocketParameter] = upickle.default.macroRW[RocketParameter] } @@ -317,6 +325,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle { val cease = Output(Bool()) val wfi = Output(Bool()) val traceStall = Input(Bool()) + val rocketProbe = Output(Probe(new RocketProbe(parameter))) } /** The [[Rocket]] is the next version of the RocketCore, @@ -1463,6 +1472,13 @@ class Rocket(val parameter: RocketParameter) val icacheBlocked = !(io.imem.resp.valid || RegNext(io.imem.resp.valid)) // todo: perfEvents here. // csr.io.counters.foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) } + + // probe xrf write + val probeWire = Wire(new RocketProbe(parameter)) + define(io.rocketProbe, ProbeValue(probeWire)) + probeWire.rfWen := rfWen + probeWire.rfWaddr := rfWaddr + probeWire.rfWdata := rfWdata } def checkExceptions(x: Seq[(Bool, UInt)]) = diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala index fd369482c..2a8ce3d8c 100644 --- a/rocketv/src/RocketTile.scala +++ b/rocketv/src/RocketTile.scala @@ -5,6 +5,7 @@ package org.chipsalliance.rocketv import chisel3._ import chisel3.experimental.hierarchy.{Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} +import chisel3.probe.{Probe, define} import chisel3.util.experimental.BitSet import chisel3.util.log2Ceil import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} @@ -394,6 +395,8 @@ class RocketTileInterface(parameter: RocketTileParameter) extends Bundle { org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) val dtimAXI: Option[AXI4RWIrrevocable] = parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val rocketProbe = Output(Probe(new RocketProbe(parameter.rocketParameter))) } class RocketTile(val parameter: RocketTileParameter) @@ -474,4 +477,7 @@ class RocketTile(val parameter: RocketTileParameter) fpu.io.cp_req <> DontCare fpu.io.cp_resp <> DontCare } + + // probe + define(io.rocketProbe, rocket.io.rocketProbe) } From 41e9893c69063e1768593479478b183ab9762b3d Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Fri, 26 Jul 2024 15:35:52 +0800 Subject: [PATCH 111/140] [rocketemu] Add spike_rs, offline and test_common crates with dependencies --- rocketemu/Cargo.lock | 599 ++++++++++++++++++++++ rocketemu/Cargo.toml | 22 + rocketemu/driver/Cargo.toml | 9 +- rocketemu/offline/Cargo.toml | 18 + rocketemu/offline/src/difftest.rs | 90 ++++ rocketemu/offline/src/dut.rs | 48 ++ rocketemu/offline/src/json_events.rs | 409 +++++++++++++++ rocketemu/offline/src/main.rs | 57 ++ rocketemu/spike_rs/Cargo.toml | 10 + rocketemu/spike_rs/build.rs | 18 + rocketemu/spike_rs/src/lib.rs | 289 +++++++++++ rocketemu/spike_rs/src/spike_event.rs | 523 +++++++++++++++++++ rocketemu/spike_rs/src/util.rs | 65 +++ rocketemu/test_common/Cargo.toml | 11 + rocketemu/test_common/src/lib.rs | 64 +++ rocketemu/test_common/src/rtl_config.rs | 20 + rocketemu/test_common/src/spike_runner.rs | 145 ++++++ 17 files changed, 2393 insertions(+), 4 deletions(-) create mode 100644 rocketemu/Cargo.lock create mode 100644 rocketemu/Cargo.toml create mode 100644 rocketemu/offline/Cargo.toml create mode 100644 rocketemu/offline/src/difftest.rs create mode 100644 rocketemu/offline/src/dut.rs create mode 100644 rocketemu/offline/src/json_events.rs create mode 100644 rocketemu/offline/src/main.rs create mode 100644 rocketemu/spike_rs/Cargo.toml create mode 100644 rocketemu/spike_rs/build.rs create mode 100644 rocketemu/spike_rs/src/lib.rs create mode 100644 rocketemu/spike_rs/src/spike_event.rs create mode 100644 rocketemu/spike_rs/src/util.rs create mode 100644 rocketemu/test_common/Cargo.toml create mode 100644 rocketemu/test_common/src/lib.rs create mode 100644 rocketemu/test_common/src/rtl_config.rs create mode 100644 rocketemu/test_common/src/spike_runner.rs diff --git a/rocketemu/Cargo.lock b/rocketemu/Cargo.lock new file mode 100644 index 000000000..a7d3952fd --- /dev/null +++ b/rocketemu/Cargo.lock @@ -0,0 +1,599 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "spike_rs", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "driver" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "elf", + "hex", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "offline" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "common", + "libloading", + "serde", + "serde_json", + "spike_rs", + "tracing", + "tracing-subscriber", + "xmas-elf", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spike_rs" +version = "0.1.0" +dependencies = [ + "anyhow", + "libc", + "tracing", + "xmas-elf", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xmas-elf" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42c49817e78342f7f30a181573d82ff55b88a35f86ccaf07fc64b3008f56d1c6" +dependencies = [ + "zero", +] + +[[package]] +name = "zero" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784" diff --git a/rocketemu/Cargo.toml b/rocketemu/Cargo.toml new file mode 100644 index 000000000..cfe79c8a4 --- /dev/null +++ b/rocketemu/Cargo.toml @@ -0,0 +1,22 @@ +[workspace] +resolver = "2" +members = [ + "test_common", + "spike_rs", + "offline", + "driver", +] +exclude = [ + "spike_interfaces" +] + +[workspace.package] +version = "0.1.0" + +[workspace.dependencies] +anyhow = "1.0.79" +clap = { version = "4.4.18", features = ["derive"] } +tracing = "0.1.40" +tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" diff --git a/rocketemu/driver/Cargo.toml b/rocketemu/driver/Cargo.toml index 5fb6c775a..bc0e7bec4 100644 --- a/rocketemu/driver/Cargo.toml +++ b/rocketemu/driver/Cargo.toml @@ -4,11 +4,12 @@ version = "0.1.0" edition = "2021" [dependencies] -clap = { version = "4.4.18", features = ["derive"] } -tracing = { version = "0.1.40" } -tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } + elf = "0.7.4" -anyhow = "1.0.86" hex = "0.4.3" [features] diff --git a/rocketemu/offline/Cargo.toml b/rocketemu/offline/Cargo.toml new file mode 100644 index 000000000..1c76f647a --- /dev/null +++ b/rocketemu/offline/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "offline" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } + +libloading = "0.8.1" +xmas-elf = "0.9.1" + +common = { path = "../test_common" } +spike_rs = { path = "../spike_rs" } diff --git a/rocketemu/offline/src/difftest.rs b/rocketemu/offline/src/difftest.rs new file mode 100644 index 000000000..66a0173d9 --- /dev/null +++ b/rocketemu/offline/src/difftest.rs @@ -0,0 +1,90 @@ +use common::spike_runner::SpikeRunner; +use std::path::Path; +use tracing::info; + +use common::rtl_config::RTLConfig; +use common::CommonArgs; + +use crate::dut::Dut; +use crate::json_events::*; + +pub struct Difftest { + runner: SpikeRunner, + dut: Dut, + + #[allow(dead_code)] + config: RTLConfig, +} + +impl Difftest { + pub fn new(args: CommonArgs) -> Self { + let config = RTLConfig { vlen: args.vlen, dlen: args.dlen }; + Self { + runner: SpikeRunner::new(&args, true), + dut: Dut::new(Path::new( + &args.log_file.expect("difftest must be run with a log file"), + )), + config, + } + } + + pub fn diff(&mut self) -> anyhow::Result<()> { + self.runner.check_and_clear_fence(); + + let event = self.dut.step()?; + + match event { + JsonEvents::SimulationStart { cycle } => { + self.runner.cycle = *cycle; + Ok(()) + } + JsonEvents::SimulationStop { reason, cycle } => { + info!("simulation stopped at cycle {}, reason {}", cycle, reason); + self.runner.cycle = *cycle; + Ok(()) + } + JsonEvents::Issue { idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_issue(&IssueEvent { idx: *idx, cycle: *cycle }) + } + JsonEvents::MemoryWrite { mask, data, lsu_idx, address, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_memory_write(&MemoryWriteEvent { + mask: mask.clone(), + data: data.clone(), + lsu_idx: *lsu_idx, + address: *address, + cycle: *cycle, + }) + } + JsonEvents::LsuEnq { enq, cycle } => { + self.runner.cycle = *cycle; + self.runner.update_lsu_idx(&LsuEnqEvent { enq: *enq, cycle: *cycle }) + } + JsonEvents::VrfWrite { issue_idx, vd, offset, mask, data, lane, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_vrf_write(&VrfWriteEvent { + issue_idx: *issue_idx, + vd: *vd, + offset: *offset, + mask: mask.clone(), + data: data.clone(), + lane: *lane, + cycle: *cycle, + }) + } + JsonEvents::CheckRd { data, issue_idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.check_rd(&CheckRdEvent { data: *data, issue_idx: *issue_idx, cycle: *cycle }) + } + JsonEvents::VrfScoreboardReport { count, issue_idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.vrf_scoreboard_report(&VrfScoreboardReportEvent { + count: *count, + issue_idx: *issue_idx, + cycle: *cycle, + }) + } + } + } +} diff --git a/rocketemu/offline/src/dut.rs b/rocketemu/offline/src/dut.rs new file mode 100644 index 000000000..a4cc80821 --- /dev/null +++ b/rocketemu/offline/src/dut.rs @@ -0,0 +1,48 @@ +use anyhow::Context; +use std::io::BufRead; +use std::path::Path; + +use crate::json_events::JsonEvents; + +#[derive(Debug)] +pub struct Dut { + events: Vec, + idx: u32, +} + +impl Dut { + fn read_json(path: &Path) -> anyhow::Result> { + let file = std::fs::File::open(path).unwrap(); + let reader = std::io::BufReader::new(file); + + let mut events = Vec::new(); + + for (i, line) in reader.lines().enumerate() { + let line = line.expect("line read error"); + if line.starts_with("{") { + // ignore illegal lines + let event: JsonEvents = serde_json::from_str(&line) + .with_context(|| format!("parsing {} line {}", path.display(), i + 1))?; + events.push(event); + } + } + + Ok(events) + } + + pub fn new(path: &Path) -> Self { + let events = Self::read_json(path).unwrap(); + let idx = 0; + Self { events, idx } + } + + pub fn step(&mut self) -> anyhow::Result<&JsonEvents> { + let event = match self.events.get(self.idx as usize) { + Some(event) => event, + None => return Err(anyhow::anyhow!("no more events")), + }; + self.idx += 1; + + Ok(event) + } +} diff --git a/rocketemu/offline/src/json_events.rs b/rocketemu/offline/src/json_events.rs new file mode 100644 index 000000000..24652f04d --- /dev/null +++ b/rocketemu/offline/src/json_events.rs @@ -0,0 +1,409 @@ +use common::spike_runner::SpikeRunner; +use num_bigint::BigUint; +use serde::{Deserialize, Deserializer}; +use spike_rs::spike_event::LSU_IDX_DEFAULT; +use tracing::{debug, info}; + +#[derive(Deserialize, Debug, PartialEq, Clone)] +pub enum Opcode { + PutFullData = 0, + PutPartialData = 1, + Get = 4, + // AccessAckData = 0, + // AccessAck = 0, +} + +fn bigint_to_vec_u8<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) + .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; + Ok(bigint.to_bytes_le()) +} + +fn bigint_to_vec_bool<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) + .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; + let bytes = bigint.to_bytes_le(); + let bools = bytes.iter().flat_map(|byte| (0..8).map(move |i| (byte >> i) & 1u8 == 1u8)).collect(); + + Ok(bools) +} + +fn hex_to_u32<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let value = + u32::from_str_radix(s.trim_start_matches(' '), 16).map_err(serde::de::Error::custom)?; + + Ok(value) +} + +fn mask_display(mask: &Vec) -> String { + mask.into_iter().map(|&b| if b { '1' } else { '0' }).collect() +} + +#[derive(Deserialize, Debug)] +#[serde(tag = "event")] +pub(crate) enum JsonEvents { + SimulationStart { + cycle: u64, + }, + SimulationStop { + reason: u8, + cycle: u64, + }, + Issue { + idx: u8, + cycle: u64, + }, + LsuEnq { + enq: u32, + cycle: u64, + }, + VrfWrite { + issue_idx: u8, + vd: u32, + offset: u32, + #[serde(deserialize_with = "bigint_to_vec_bool", default)] + mask: Vec, + #[serde(deserialize_with = "bigint_to_vec_u8", default)] + data: Vec, + lane: u32, + cycle: u64, + }, + MemoryWrite { + #[serde(deserialize_with = "bigint_to_vec_bool", default)] + mask: Vec, + #[serde(deserialize_with = "bigint_to_vec_u8", default)] + data: Vec, + lsu_idx: u8, + #[serde(deserialize_with = "hex_to_u32", default)] + address: u32, + cycle: u64, + }, + CheckRd { + #[serde(deserialize_with = "hex_to_u32", default)] + data: u32, + issue_idx: u8, + cycle: u64, + }, + VrfScoreboardReport { + count: u32, + issue_idx: u8, + cycle: u64, + }, +} + +pub struct IssueEvent { + pub idx: u8, + pub cycle: u64, +} + +pub struct LsuEnqEvent { + pub enq: u32, + pub cycle: u64, +} + +pub struct VrfWriteEvent { + pub lane: u32, + pub vd: u32, + pub offset: u32, + pub mask: Vec, + pub data: Vec, + pub issue_idx: u8, + pub cycle: u64, +} + +pub struct MemoryWriteEvent { + pub mask: Vec, + pub data: Vec, + pub lsu_idx: u8, + pub address: u32, + pub cycle: u64, +} + +pub struct VrfScoreboardReportEvent { + pub count: u32, + pub issue_idx: u8, + pub cycle: u64, +} + +pub struct CheckRdEvent { + pub data: u32, + pub issue_idx: u8, + pub cycle: u64, +} + +pub(crate) trait JsonEventRunner { + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()>; + + fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()>; + + fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()>; + + fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()>; + + fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()>; + + fn check_and_clear_fence(&mut self); + + fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()>; + + fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()>; +} + +impl JsonEventRunner for SpikeRunner { + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()> { + self.find_v_se_to_issue(); // ensure the front of queue is a new un-issued se + let se = self.commit_queue.front_mut().unwrap(); + if se.is_vfence() { + return Ok(()); + } + + se.issue_idx = issue.idx as u8; + + info!( + "[{}] SpikePeekIssue: issue_idx={}, pc={:#x}, inst={}", + issue.cycle, issue.idx, se.pc, se.disasm + ); + + Ok(()) + } + + fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()> { + let enq = lsu_enq.enq; + assert!(enq > 0, "enq should be greater than 0"); + let cycle = lsu_enq.cycle; + + if let Some(se) = self + .commit_queue + .iter_mut() + .rev() + .find(|se| (se.is_vload() || se.is_vstore()) && se.lsu_idx == LSU_IDX_DEFAULT) + { + let index = enq.trailing_zeros() as u8; + se.lsu_idx = index; + info!( + "[{cycle}] UpdateLSUIdx: instr ({}) is allocated with lsu_idx: {index}", + se.describe_insn() + ); + } + Ok(()) + } + + fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()> { + let cycle = vrf_write.cycle; + let vlen_in_bytes = self.vlen / 8; + let lane_number = self.dlen / 32; + let record_idx_base = (vrf_write.vd * vlen_in_bytes + + (vrf_write.lane + lane_number * vrf_write.offset) * 4) as usize; + + let mut retire_issue: Option = None; + + if let Some(se) = + self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == vrf_write.issue_idx) + { + debug!( + "[{}] VrfWrite: lane={}, vd={}, idx_base={}, issue_idx={}, offset={}, mask={}, data={:x?} ({})", + vrf_write.cycle, + vrf_write.lane, + record_idx_base, + vrf_write.vd, + vrf_write.issue_idx, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data, + se.describe_insn() + ); + + if let Some(unretired_writes) = se.vrf_access_record.unretired_writes { + assert!( + unretired_writes > 0, + "[{}] unretired_writes should be greater than 0, issue_idx={} ({})", + vrf_write.cycle, + vrf_write.issue_idx, + se.describe_insn() + ); + if unretired_writes == 1 { + retire_issue = Some(vrf_write.issue_idx); + } + se.vrf_access_record.unretired_writes = Some(unretired_writes - 1); + } else { + se.vrf_access_record.retired_writes += 1; + } + + vrf_write.mask.iter().enumerate().filter(|(_, &mask)| mask).for_each(|(offset, _)| { + let written_byte = *vrf_write.data.get(offset).unwrap_or(&0); + + if let Some(record) = se.vrf_access_record.all_writes.get_mut(&(record_idx_base + offset)) { + assert_eq!( + record.byte, + written_byte, + "[{}] {offset}th byte incorrect ({:02x} record != {written_byte:02x} written) \ + for vrf write (lane={}, vd={}, offset={}, mask={}, data={:x?}) \ + issue_idx={} [vrf_idx={}] (disasm: {}, pc: {:#x}, bits: {:#x})", + vrf_write.cycle, + record.byte, + vrf_write.lane, + vrf_write.vd, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data, + se.issue_idx, + record_idx_base + offset, + se.disasm, + se.pc, + se.inst_bits + ); + record.executed = true; + } else { + debug!( + "[{}] cannot find vrf write record, maybe not changed (lane={}, vd={}, idx={}, offset={}, mask={}, data={:x?})", + vrf_write.cycle, + vrf_write.lane, + vrf_write.vd, + record_idx_base + offset, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data + ); + } + }) + } else { + info!( + "[{cycle}] RecordRFAccess: rtl detect vrf write on lane={}, vd={} \ + with no matched se (issue_idx={}), \ + maybe from committed load insn", + vrf_write.lane, vrf_write.vd, vrf_write.issue_idx + ); + } + + if let Some(issue_idx) = retire_issue { + self.retire(cycle, issue_idx).unwrap(); + } + + Ok(()) + } + + fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()> { + let data = memory_write.data.to_owned(); + let mask = memory_write.mask.to_owned(); + let cycle = memory_write.cycle; + let base_addr = memory_write.address; + let lsu_idx = memory_write.lsu_idx; + + if let Some(se) = self.commit_queue.iter_mut().find(|se| se.lsu_idx == lsu_idx) { + info!("[{cycle}] MemoryWrite: address={base_addr:08x}, size={}, data={data:x?}, mask={}, pc = {:#x}, disasm = {}", data.len(), mask_display(&mask), se.pc, se.disasm); + // compare with spike event record + mask.iter().enumerate() + .filter(|(_, &mask)| mask) + .for_each(|(offset, _)| { + let byte_addr = base_addr + offset as u32; + let data_byte = *data.get(offset).unwrap_or(&0); + let mem_write = + se.mem_access_record.all_writes.get_mut(&byte_addr).unwrap_or_else(|| { + panic!("[{cycle}] cannot find mem write of byte_addr {byte_addr:08x}") + }); + let single_mem_write_val = mem_write.writes[mem_write.num_completed_writes].val; + mem_write.num_completed_writes += 1; + assert_eq!(single_mem_write_val, data_byte, "[{cycle}] expect mem write of byte {single_mem_write_val:02X}, actual byte {data_byte:02X} (byte_addr={byte_addr:08X}, pc = {:#x}, disasm = {})", se.pc, se.disasm); + }); + return Ok(()); + } + + panic!("[{cycle}] cannot find se with instruction lsu_idx={lsu_idx}") + } + + fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()> { + let count = report.count; + let issue_idx = report.issue_idx; + let cycle = report.cycle; + + let mut should_retire: Option = None; + + if let Some(se) = self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == issue_idx) { + assert!( + se.vrf_access_record.retired_writes <= count, + "[{cycle}] retired_writes({}) should be less than count({count}), issue_idx={issue_idx} ({})", + se.vrf_access_record.retired_writes, se.describe_insn() + ); + + // if instruction writes rd, it will retire in check_rd() + if count == se.vrf_access_record.retired_writes && !se.is_rd_written { + should_retire = Some(issue_idx); + } + // if all writes are committed, retire the se + se.vrf_access_record.unretired_writes = Some(count - se.vrf_access_record.retired_writes); + + info!( + "[{cycle}] VrfScoreboardReport: count={count}, issue_idx={issue_idx}, retired={} ({})", + se.vrf_access_record.retired_writes, + se.describe_insn() + ); + } else { + panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}"); + } + + if let Some(issue_idx) = should_retire { + self.retire(cycle, issue_idx).unwrap(); + } + + Ok(()) + } + + /// after update, if instructions before fence are cleared, fence is also cleared + fn check_and_clear_fence(&mut self) { + if !self.commit_queue.is_empty() { + let se = self.commit_queue.back().unwrap(); + + if se.is_vfence() && self.commit_queue.len() == 1 { + self.commit_queue.pop_back(); + } + } + } + + fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()> { + let data = check_rd.data; + let cycle = check_rd.cycle; + let issue_idx = check_rd.issue_idx; + + let se = + self.commit_queue.iter_mut().find(|se| se.issue_idx == issue_idx).unwrap_or_else(|| { + panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}") + }); + + info!("[{cycle}] CheckRd: issue_idx={issue_idx}, data={data:x?}"); + + se.check_rd(data).expect("Failed to check_rd"); + + self.retire(cycle, issue_idx).unwrap(); + + Ok(()) + } + + fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()> { + if let Some(idx) = self.commit_queue.iter().position(|se| se.issue_idx == issue_idx) { + if let Some(se) = self.commit_queue.remove(idx) { + info!( + "[{cycle}] Retire: retire se with issue_idx={issue_idx}, ({})", + se.describe_insn() + ); + se.check_is_ready_for_commit(cycle).unwrap(); + } else { + panic!("[{cycle}] Retire: cannot remove se with instruction issue_idx={issue_idx}") + } + } else { + panic!("[{cycle}] Retire: cannot find se with instruction issue_idx={issue_idx}") + } + Ok(()) + } +} diff --git a/rocketemu/offline/src/main.rs b/rocketemu/offline/src/main.rs new file mode 100644 index 000000000..0328e2cf3 --- /dev/null +++ b/rocketemu/offline/src/main.rs @@ -0,0 +1,57 @@ +mod difftest; +mod dut; +mod json_events; + +use clap::Parser; +use tracing::info; + +use common::spike_runner::SpikeRunner; +use common::CommonArgs; + +use crate::difftest::Difftest; + +fn run_spike(args: &CommonArgs) -> anyhow::Result<()> { + let mut count: u64 = 0; + + let spike = SpikeRunner::new(args, true); + loop { + count += 1; + if count % 1000000 == 0 { + info!("count = {}", count); + } + match spike.exec() { + Ok(_) => {} + Err(_) => { + info!("total v instrucions count = {}", count); + info!("Simulation quit graceful"); + return Ok(()); + } + }; + } +} + +fn main() -> anyhow::Result<()> { + // parse args + let args = CommonArgs::parse(); + + args.setup_logger()?; + + // if there is no log file, just run spike and quit + if args.log_file.is_none() { + run_spike(&args)?; + return Ok(()); + } + + // if there is a log file, run difftest + let mut diff = Difftest::new(args); + + loop { + match diff.diff() { + Ok(_) => {} + Err(e) => { + info!("Simulation quit/error with {}", e); + return Ok(()); + } + } + } +} diff --git a/rocketemu/spike_rs/Cargo.toml b/rocketemu/spike_rs/Cargo.toml new file mode 100644 index 000000000..411d44f72 --- /dev/null +++ b/rocketemu/spike_rs/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "spike_rs" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +tracing = { workspace = true } +libc = "0.2.155" +xmas-elf = "0.9.1" diff --git a/rocketemu/spike_rs/build.rs b/rocketemu/spike_rs/build.rs new file mode 100644 index 000000000..9399fdaf0 --- /dev/null +++ b/rocketemu/spike_rs/build.rs @@ -0,0 +1,18 @@ +use std::env; + +fn main() { + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_LIB_DIR").expect("SPIKE_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=riscv"); + println!("cargo::rustc-link-lib=static=softfloat"); + println!("cargo::rustc-link-lib=static=disasm"); + println!("cargo::rustc-link-lib=static=fesvr"); + println!("cargo::rustc-link-lib=static=fdt"); + + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_INTERFACES_LIB_DIR").expect("SPIKE_INTERFACES_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=spike_interfaces"); + + println!("cargo::rerun-if-env-changed=SPIKE_LIB_DIR"); + println!("cargo::rerun-if-env-changed=SPIKE_INTERFACES_LIB_DIR"); + + println!("cargo::rustc-link-lib=stdc++"); +} diff --git a/rocketemu/spike_rs/src/lib.rs b/rocketemu/spike_rs/src/lib.rs new file mode 100644 index 000000000..3744d4a72 --- /dev/null +++ b/rocketemu/spike_rs/src/lib.rs @@ -0,0 +1,289 @@ +pub mod spike_event; +pub mod util; + +use libc::c_char; +use std::ffi::{CStr, CString}; +use tracing::trace; + +pub fn clip(binary: u32, a: i32, b: i32) -> u32 { + assert!(a <= b, "a should be less than or equal to b"); + let nbits = b - a + 1; + let mask = if nbits >= 32 { + u32::MAX + } else { + (1 << nbits) - 1 + }; + (binary >> a) & mask +} + +pub struct Spike { + spike: *mut (), + pub mem: Vec, + pub size: usize, +} + +unsafe impl Send for Spike {} + +extern "C" fn default_addr_to_mem(target: *mut (), addr: u64) -> *mut u8 { + let spike = target as *mut Spike; + let addr = addr as usize; + unsafe { + let spike: &mut Spike = &mut *spike; + let ptr = spike.mem.as_mut_ptr().offset(addr as isize); + ptr + } +} + +type FfiCallback = extern "C" fn(*mut (), u64) -> *mut u8; + +impl Spike { + // we need to have a boxed SpikeCObject, since its pointer will be passed to C to perform FFI call + pub fn new(arch: &str, set: &str, lvl: &str, lane_number: usize, mem_size: usize) -> Box { + let arch = CString::new(arch).unwrap(); + let set = CString::new(set).unwrap(); + let lvl = CString::new(lvl).unwrap(); + let spike = unsafe { spike_new(arch.as_ptr(), set.as_ptr(), lvl.as_ptr(), lane_number) }; + let mut self_: Box = Box::new(Spike { spike, mem: vec![0; mem_size], size: mem_size }); + + // TODO: support customized ffi + let ffi_target: *mut Spike = &mut *self_; + unsafe { + spike_register_callback(ffi_target as *mut (), default_addr_to_mem); + } + + self_ + } + + pub fn get_proc(&self) -> Processor { + let processor = unsafe { spike_get_proc(self.spike) }; + Processor { processor } + } + + pub fn load_bytes_to_mem( + &mut self, + addr: usize, + len: usize, + bytes: Vec, + ) -> anyhow::Result<()> { + trace!("ld: addr: 0x{:x}, len: 0x{:x}", addr, len); + assert!(addr + len <= self.size); + + let dst = &mut self.mem[addr..addr + len]; + for (i, byte) in bytes.iter().enumerate() { + dst[i] = *byte; + } + + Ok(()) + } + + pub fn mem_byte_on_addr(&self, addr: usize) -> anyhow::Result { + Ok(self.mem[addr]) + } +} + +impl Drop for Spike { + fn drop(&mut self) { + unsafe { spike_destruct(self.spike) } + } +} + +pub struct Processor { + processor: *mut (), +} + +impl Processor { + pub fn disassemble(&self) -> String { + let bytes = unsafe { proc_disassemble(self.processor) }; + let c_str = unsafe { CStr::from_ptr(bytes as *mut c_char) }; + format!("{}", c_str.to_string_lossy()) + } + + pub fn reset(&self) { + unsafe { proc_reset(self.processor) } + } + + pub fn get_state(&self) -> State { + let state = unsafe { proc_get_state(self.processor) }; + State { state } + } + + pub fn func(&self) -> u64 { + unsafe { proc_func(self.processor) } + } + + pub fn get_insn(&self) -> u32 { + unsafe { proc_get_insn(self.processor) as u32 } + } + + pub fn get_vreg_data(&self, idx: u32, offset: u32) -> u8 { + unsafe { proc_get_vreg_data(self.processor, idx, offset) } + } + + pub fn get_rs1(&self) -> u32 { + unsafe { proc_get_rs1(self.processor) } + } + + pub fn get_rs2(&self) -> u32 { + unsafe { proc_get_rs2(self.processor) } + } + + pub fn get_rd(&self) -> u32 { + unsafe { proc_get_rd(self.processor) } + } + + // vu + pub fn vu_get_vtype(&self) -> u32 { + unsafe { proc_vu_get_vtype(self.processor) as u32 } + } + + pub fn vu_get_vxrm(&self) -> u32 { + unsafe { proc_vu_get_vxrm(self.processor) } + } + + pub fn vu_get_vnf(&self) -> u32 { + unsafe { proc_vu_get_vnf(self.processor) } + } + + pub fn vu_get_vill(&self) -> bool { + unsafe { proc_vu_get_vill(self.processor) } + } + + pub fn vu_get_vxsat(&self) -> bool { + unsafe { proc_vu_get_vxsat(self.processor) } + } + + pub fn vu_get_vl(&self) -> u32 { + unsafe { proc_vu_get_vl(self.processor) } + } + + pub fn vu_get_vstart(&self) -> u16 { + unsafe { proc_vu_get_vstart(self.processor) } + } +} + +impl Drop for Processor { + fn drop(&mut self) { + unsafe { proc_destruct(self.processor) } + } +} + +pub struct State { + state: *mut (), +} + +impl State { + pub fn set_pc(&self, pc: u64) { + unsafe { state_set_pc(self.state, pc) } + } + + pub fn get_pc(&self) -> u64 { + unsafe { state_get_pc(self.state) } + } + + pub fn handle_pc(&self, pc: u64) -> anyhow::Result<()> { + match unsafe { state_handle_pc(self.state, pc) } { + 0 => Ok(()), + _ => Err(anyhow::anyhow!("Error handling pc")), + } + } + + pub fn get_reg(&self, idx: u32, is_fp: bool) -> u32 { + unsafe { state_get_reg(self.state, idx, is_fp) } + } + + pub fn get_reg_write_size(&self) -> u32 { + unsafe { state_get_reg_write_size(self.state) } + } + + pub fn get_reg_write_index(&self, index: u32) -> u32 { + unsafe { state_get_reg_write_index(self.state, index) } + } + + pub fn get_mem_write_size(&self) -> u32 { + unsafe { state_get_mem_write_size(self.state) } + } + + pub fn get_mem_write(&self, index: u32) -> (u32, u64, u8) { + let addr = unsafe { state_get_mem_write_addr(self.state, index) }; + let value = unsafe { state_get_mem_write_value(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_write_size_by_byte(self.state, index) }; + (addr, value, size_by_byte) + } + + pub fn get_mem_read_size(&self) -> u32 { + unsafe { state_get_mem_read_size(self.state) } + } + + pub fn get_mem_read(&self, index: u32) -> (u32, u8) { + let addr = unsafe { state_get_mem_read_addr(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_read_size_by_byte(self.state, index) }; + (addr, size_by_byte) + } + + pub fn set_mcycle(&self, mcycle: usize) { + unsafe { state_set_mcycle(self.state, mcycle) } + } + + pub fn clear(&self) { + unsafe { state_clear(self.state) } + } + + pub fn exit(&self) -> u64 { + unsafe { state_exit(self.state) } + } +} + +impl Drop for State { + fn drop(&mut self) { + unsafe { state_destruct(self.state) } + } +} + +#[link(name = "spike_interfaces")] +extern "C" { + pub fn spike_register_callback(target: *mut (), callback: FfiCallback); + fn spike_new( + arch: *const c_char, + set: *const c_char, + lvl: *const c_char, + lane_number: usize, + ) -> *mut (); + fn spike_get_proc(spike: *mut ()) -> *mut (); + fn spike_destruct(spike: *mut ()); + fn proc_disassemble(proc: *mut ()) -> *mut c_char; + fn proc_reset(proc: *mut ()); + fn proc_get_state(proc: *mut ()) -> *mut (); + fn proc_func(proc: *mut ()) -> u64; + fn proc_get_insn(proc: *mut ()) -> u64; + fn proc_get_vreg_data(proc: *mut (), vreg_idx: u32, vreg_offset: u32) -> u8; + fn proc_get_rs1(proc: *mut ()) -> u32; + fn proc_get_rs2(proc: *mut ()) -> u32; + fn proc_get_rd(proc: *mut ()) -> u32; + + fn proc_vu_get_vtype(proc: *mut ()) -> u64; + fn proc_vu_get_vxrm(proc: *mut ()) -> u32; + fn proc_vu_get_vnf(proc: *mut ()) -> u32; + fn proc_vu_get_vill(proc: *mut ()) -> bool; + fn proc_vu_get_vxsat(proc: *mut ()) -> bool; + fn proc_vu_get_vl(proc: *mut ()) -> u32; + fn proc_vu_get_vstart(proc: *mut ()) -> u16; + + fn proc_destruct(proc: *mut ()); + fn state_set_pc(state: *mut (), pc: u64); + fn state_get_pc(state: *mut ()) -> u64; + fn state_get_reg(state: *mut (), index: u32, is_fp: bool) -> u32; + fn state_get_reg_write_size(state: *mut ()) -> u32; + fn state_get_reg_write_index(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_size(state: *mut ()) -> u32; + fn state_get_mem_write_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_value(state: *mut (), index: u32) -> u64; + fn state_get_mem_write_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_get_mem_read_size(state: *mut ()) -> u32; + fn state_get_mem_read_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_read_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_handle_pc(state: *mut (), pc: u64) -> u64; + fn state_set_mcycle(state: *mut (), mcycle: usize); + fn state_clear(state: *mut ()); + fn state_destruct(state: *mut ()); + fn state_exit(state: *mut ()) -> u64; +} diff --git a/rocketemu/spike_rs/src/spike_event.rs b/rocketemu/spike_rs/src/spike_event.rs new file mode 100644 index 000000000..611f7156b --- /dev/null +++ b/rocketemu/spike_rs/src/spike_event.rs @@ -0,0 +1,523 @@ +use std::collections::HashMap; +use tracing::trace; +use Default; + +use crate::clip; +use crate::Spike; + +#[derive(Debug, Clone)] +pub struct SingleMemWrite { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct SingleMemRead { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct MemWriteRecord { + pub writes: Vec, + pub num_completed_writes: usize, +} + +#[derive(Debug, Clone)] +pub struct MemReadRecord { + pub reads: Vec, + pub num_completed_reads: usize, +} + +#[derive(Debug, Clone)] +pub struct SingleVrfWrite { + pub byte: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Default, Debug, Clone)] +pub struct VdWriteRecord { + vd_bytes: Vec, +} + +#[derive(Default, Debug, Clone)] +pub struct MemAccessRecord { + pub all_writes: HashMap, + pub all_reads: HashMap, +} + +#[derive(Default, Debug, Clone)] +pub struct VrfAccessRecord { + pub all_writes: HashMap, + pub unretired_writes: Option, + pub retired_writes: u32, +} + +pub const LSU_IDX_DEFAULT: u8 = 0xff; +pub const ISSUE_IDX_DEFAULT: u8 = 0xff; + +#[derive(Default, Debug, Clone)] +pub struct SpikeEvent { + pub do_log_vrf: bool, + + // index + pub lsu_idx: u8, + pub issue_idx: u8, + + // instruction + pub disasm: String, + pub pc: u64, + pub inst_bits: u32, + + // scalar to vector interface(used for driver) + pub rs1: u32, + pub rs2: u32, + pub rs1_bits: u32, + pub rs2_bits: u32, + pub rd_idx: u32, + + // vtype + pub vtype: u32, + pub vxrm: u32, + pub vnf: u32, + + // other CSR + pub vill: bool, + pub vxsat: bool, + pub vl: u32, + pub vstart: u16, + + // rd + pub rd_bits: u32, + + // mutable states + pub is_rd_written: bool, + pub vd_write_record: VdWriteRecord, + pub mem_access_record: MemAccessRecord, + pub vrf_access_record: VrfAccessRecord, +} + +impl SpikeEvent { + pub fn new(spike: &Spike, do_log_vrf: bool) -> Self { + let proc = spike.get_proc(); + let state = proc.get_state(); + let inst_bits = proc.get_insn(); + + let opcode = clip(inst_bits, 0, 6); + let width = clip(inst_bits, 12, 14); + + let is_rs_fp = opcode == 0b1010111 && width == 0b101/* OPFVF */; + // early return vsetvl scalar instruction + + // rs1, rs2 + let (rs1, rs2) = (proc.get_rs1(), proc.get_rs2()); + + SpikeEvent { + do_log_vrf, + + lsu_idx: LSU_IDX_DEFAULT, + issue_idx: ISSUE_IDX_DEFAULT, + + disasm: spike.get_proc().disassemble(), + pc: proc.get_state().get_pc(), + inst_bits, + + rs1, + rs2, + rs1_bits: state.get_reg(rs1, is_rs_fp), + rs2_bits: state.get_reg(rs2, is_rs_fp), + rd_idx: proc.get_rd(), + + vtype: proc.vu_get_vtype(), + vxrm: proc.vu_get_vxrm(), + vnf: proc.vu_get_vnf(), + + vill: proc.vu_get_vill(), + vxsat: proc.vu_get_vxsat(), + vl: proc.vu_get_vl(), + vstart: proc.vu_get_vstart(), + + rd_bits: Default::default(), + + is_rd_written: false, + vd_write_record: Default::default(), + mem_access_record: Default::default(), + vrf_access_record: Default::default(), + } + } + + pub fn opcode(&self) -> u32 { + clip(self.inst_bits, 0, 6) + } + + pub fn width(&self) -> u32 { + clip(self.inst_bits, 12, 14) + } + + pub fn rs1(&self) -> u32 { + clip(self.inst_bits, 15, 19) + } + + pub fn csr(&self) -> u32 { + clip(self.inst_bits, 20, 31) + } + + pub fn funct6(&self) -> u32 { + clip(self.inst_bits, 26, 31) + } + + pub fn mop(&self) -> u32 { + clip(self.inst_bits, 26, 27) + } + + pub fn lumop(&self) -> u32 { + clip(self.inst_bits, 20, 24) + } + + pub fn vm(&self) -> bool { + clip(self.inst_bits, 25, 25) != 0 + } + + // check whether the instruction is a vector load + pub fn is_vload(&self) -> bool { + self.opcode() == 0b0000111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + // check whether the instruction is a vector store + pub fn is_vstore(&self) -> bool { + self.opcode() == 0b0100111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + pub fn is_v(&self) -> bool { + (self.opcode() == 0b1010111 || self.is_vload() || self.is_vstore()) && !self.is_vsetvl() + } + + pub fn is_vsetvl(&self) -> bool { + self.opcode() == 0b1010111 && self.width() == 0b111 + } + + pub fn is_scalar(&self) -> bool { + !self.is_v() + } + + // check whether the instruction is a scalar load + pub fn is_load(&self) -> bool { + self.opcode() == 0b0000011 || self.is_cl() + } + + // check whether the instruction is a scalar store + pub fn is_store(&self) -> bool { + self.opcode() == 0b0100011 || self.is_cw() + } + + pub fn is_whole(&self) -> bool { + self.mop() == 0 && self.lumop() == 8 + } + + pub fn is_widening(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_mask_vd(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_exit(&self) -> bool { + let is_csr_type = self.opcode() == 0b1110011 && ((self.width() & 0b011) != 0); + let is_csr_write = is_csr_type && (((self.width() & 0b100) | self.rs1()) != 0); + + is_csr_write && self.csr() == 0x7cc + } + + pub fn is_vfence(&self) -> bool { + self.is_exit() // only exit instruction is treated as fence now + } + + pub fn is_rd_fp(&self) -> bool { + (self.opcode() == 0b1010111) + && (self.rs1 == 0) + && (self.funct6() == 0b010000) + && self.vm() + && (self.width() == 0b001) + } + + pub fn c_op(&self) -> u32 { + clip(self.inst_bits, 0, 1) + } + + pub fn c_func3(&self) -> u32 { + clip(self.inst_bits, 13, 15) + } + + pub fn is_cl(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 == 0 ) || /* c.lw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 == 0 ) /* c.lwsp */ + } + + pub fn is_cw(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 != 0 ) || /* c.sw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 != 0 ) /* c.swsp */ + } + + pub fn vlmul(&self) -> u32 { + clip(self.vtype, 0, 2) + } + + pub fn vma(&self) -> bool { + clip(self.vtype, 7, 7) != 0 + } + + pub fn vta(&self) -> bool { + clip(self.vtype, 6, 6) != 0 + } + + pub fn vsew(&self) -> u32 { + clip(self.vtype, 3, 5) + } + + pub fn vcsr(&self) -> u32 { + self.vxsat as u32 | self.vxrm << 1 + } + + pub fn describe_insn(&self) -> String { + format!( + "pc={:#x}, disasm='{}', bits={:#x}", + self.pc, self.disasm, self.inst_bits + ) + } + + pub fn get_vrf_write_range(&self, vlen_in_bytes: u32) -> anyhow::Result<(u32, u32)> { + if self.is_vstore() { + return Ok((0, 0)); + } + + if self.is_vload() { + let vd_bytes_start = self.rd_idx * vlen_in_bytes; + if self.is_whole() { + return Ok((vd_bytes_start, vlen_in_bytes * (1 + self.vnf))); + } + let len = if self.vlmul() & 0b100 != 0 { + vlen_in_bytes * (1 + self.vnf) + } else { + (vlen_in_bytes * (1 + self.vnf)) << self.vlmul() + }; + return Ok((vd_bytes_start, len)); + } + + let vd_bytes_start = self.rd_idx * vlen_in_bytes; + + if self.is_mask_vd() { + return Ok((vd_bytes_start, vlen_in_bytes)); + } + + let len = if self.vlmul() & 0b100 != 0 { + vlen_in_bytes >> (8 - self.vlmul()) + } else { + vlen_in_bytes << self.vlmul() + }; + + Ok(( + vd_bytes_start, + if self.is_widening() { len * 2 } else { len }, + )) + } + + pub fn pre_log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + if self.do_log_vrf { + self.rd_bits = spike.get_proc().get_rd(); + + // record the vrf writes before executing the insn + let vlen_in_bytes = vlen; + + let proc = spike.get_proc(); + let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); + self.vd_write_record.vd_bytes.resize(len as usize, 0u8); + for i in 0..len { + let offset = start + i; + let vreg_index = offset / vlen_in_bytes; + let vreg_offset = offset % vlen_in_bytes; + let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); + self.vd_write_record.vd_bytes[i as usize] = cur_byte; + } + } + + Ok(()) + } + + pub fn log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + if self.do_log_vrf { + self.log_vrf_write(spike, vlen).unwrap(); + self.log_reg_write(spike).unwrap(); + } + self.log_mem_write(spike).unwrap(); + self.log_mem_read(spike).unwrap(); + + Ok(()) + } + + fn log_vrf_write(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + let proc = spike.get_proc(); + // record vrf writes + // note that we do not need log_reg_write to find records, we just decode the + // insn and compare bytes + let vlen_in_bytes = vlen / 8; + let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); + trace!("vrf write range: start: {start}, len: {len}"); + for i in 0..len { + let offset = start + i; + let origin_byte = self.vd_write_record.vd_bytes[i as usize]; + let vreg_index = offset / vlen_in_bytes; + let vreg_offset = offset % vlen_in_bytes; + let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); + if origin_byte != cur_byte { + self + .vrf_access_record + .all_writes + .entry(offset as usize) + .or_insert(SingleVrfWrite { byte: cur_byte, executed: false }); + trace!( + "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, change_to={cur_byte}, vrf_idx={offset}", + vec![offset / vlen_in_bytes, offset % vlen_in_bytes], + ); + } else { + trace!( + "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, not changed, vrf_idx={offset}", + vec![offset / vlen_in_bytes, offset % vlen_in_bytes], + ); + } + } + Ok(()) + } + + fn log_reg_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + // in spike, log_reg_write is arrange: + // xx0000 <- x + // xx0001 <- f + // xx0010 <- vreg + // xx0011 <- vec + // xx0100 <- csr + let reg_write_size = state.get_reg_write_size(); + // TODO: refactor it. + (0..reg_write_size).for_each(|idx| match state.get_reg_write_index(idx) & 0xf { + 0b0000 => { + // scalar rf + let data = state.get_reg(self.rd_idx, false); + self.is_rd_written = true; + if data != self.rd_bits { + trace!( + "ScalarRFChange: idx={}, change_from={}, change_to={data}", + self.rd_idx, + self.rd_bits + ); + self.rd_bits = data; + } + } + 0b0001 => { + let data = state.get_reg(self.rd_idx, true); + self.is_rd_written = true; + if data != self.rd_bits { + trace!( + "FloatRFChange: idx={}, change_from={}, change_to={data}", + self.rd_idx, + self.rd_bits + ); + self.rd_bits = data; + } + } + _ => trace!( + "UnknownRegChange, idx={}, spike detect unknown reg change", + state.get_reg_write_index(idx) + ), + }); + + Ok(()) + } + + pub fn log_mem_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_write_size = state.get_mem_write_size(); + (0..mem_write_size).for_each(|i| { + let (addr, value, size) = state.get_mem_write(i); + (0..size).for_each(|offset| { + self + .mem_access_record + .all_writes + .entry(addr + offset as u32) + .or_insert(MemWriteRecord { writes: vec![], num_completed_writes: 0 }) + .writes + .push(SingleMemWrite { + val: (value >> (offset * 8)) as u8, + executed: false, + }); + }); + trace!("SpikeMemWrite: addr={addr:x}, value={value:x}, size={size}"); + }); + + Ok(()) + } + + fn log_mem_read(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_read_size = state.get_mem_read_size(); + (0..mem_read_size).for_each(|i| { + let (addr, size) = state.get_mem_read(i); + let mut value = 0; + (0..size).for_each(|offset| { + let byte = spike.mem_byte_on_addr(addr as usize + offset as usize).unwrap(); + value |= (byte as u64) << (offset * 8); + // record the read + self + .mem_access_record + .all_reads + .entry(addr + offset as u32) + .or_insert(MemReadRecord { reads: vec![], num_completed_reads: 0 }) + .reads + .push(SingleMemRead { val: byte, executed: false }); + }); + trace!("SpikeMemRead: addr={addr:08x}, value={value:08x}, size={size}"); + }); + + Ok(()) + } + + pub fn check_rd(&self, data: u32) -> anyhow::Result<()> { + // TODO: rtl should indicate whether resp_bits_data is valid + if self.is_rd_written { + assert_eq!( + data, self.rd_bits, + "expect to write rd[{}] = {}, actual {}", + self.rd_idx, self.rd_bits, data + ); + } + + Ok(()) + } + + pub fn check_is_ready_for_commit(&self, cycle: u64) -> anyhow::Result<()> { + for (addr, record) in &self.mem_access_record.all_writes { + assert_eq!( + record.num_completed_writes, + record.writes.len(), + "[{cycle}] expect to write mem {addr:#x}, not executed when commit, issue_idx={} ({})", + self.issue_idx, + self.describe_insn(), + ); + } + for (idx, record) in &self.vrf_access_record.all_writes { + assert!( + record.executed, + "[{cycle}] expect to write vrf {idx}, not executed when commit, issue_idx={} ({})", + self.issue_idx, + self.describe_insn() + ); + } + + Ok(()) + } +} diff --git a/rocketemu/spike_rs/src/util.rs b/rocketemu/spike_rs/src/util.rs new file mode 100644 index 000000000..6ded0eec5 --- /dev/null +++ b/rocketemu/spike_rs/src/util.rs @@ -0,0 +1,65 @@ +use crate::Spike; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use xmas_elf::program::{ProgramHeader, Type}; +use xmas_elf::{header, ElfFile}; + +pub fn load_elf(spike: &mut Spike, fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + spike.load_bytes_to_mem(addr, size, slice.to_vec()).unwrap(); + } + } + } + + Ok(header.pt2.entry_point()) +} + +// todo: unify load_elf and load_elf_to_buffer +pub fn load_elf_to_buffer(mem: &mut [u8], fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + + let dst: &mut _ = &mut mem[addr..addr + size]; + for (i, byte) in slice.iter().enumerate() { + dst[i] = *byte; + } + } + } + } + + Ok(header.pt2.entry_point()) +} diff --git a/rocketemu/test_common/Cargo.toml b/rocketemu/test_common/Cargo.toml new file mode 100644 index 000000000..d5b3f32aa --- /dev/null +++ b/rocketemu/test_common/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "common" +version.workspace = true +edition = "2021" + +[dependencies] +spike_rs = { path = "../spike_rs" } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } diff --git a/rocketemu/test_common/src/lib.rs b/rocketemu/test_common/src/lib.rs new file mode 100644 index 000000000..ae582b77d --- /dev/null +++ b/rocketemu/test_common/src/lib.rs @@ -0,0 +1,64 @@ +use anyhow::Result; +use clap::Parser; +use spike_rs::Spike; +use std::path::PathBuf; +use tracing::Level; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; + +pub mod rtl_config; +pub mod spike_runner; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct CommonArgs { + /// Path to the ELF file + #[arg(long)] + pub elf_file: PathBuf, + + /// Path to the log file + #[arg(long)] + pub log_file: Option, + + /// Log level: trace, debug, info, warn, error + #[arg(long, default_value = "info")] + pub log_level: String, + + /// vlen config + #[arg(long, default_value = option_env!("DESIGN_VLEN").unwrap_or("0"))] + pub vlen: u32, + + /// dlen config + #[arg(long, default_value = option_env!("DESIGN_DLEN").unwrap_or("0"))] + pub dlen: u32, + + /// ISA config + #[arg(long, default_value = "rv32gcv")] + pub set: String, +} + +pub static MEM_SIZE: usize = 1usize << 32; + +impl CommonArgs { + pub fn to_spike_c_handler(&self) -> Box { + let arch = &format!("vlen:{},elen:32", self.vlen); + let lvl = "M"; + + Spike::new(arch, &self.set, lvl, (self.dlen / 32) as usize, MEM_SIZE) + } + + pub fn setup_logger(&self) -> Result<()> { + // setup log + let log_level: Level = self.log_level.parse()?; + let global_logger = FmtSubscriber::builder() + .with_env_filter(EnvFilter::from_default_env()) + .with_max_level(log_level) + .without_time() + .with_target(false) + .with_ansi(true) + .compact() + .finish(); + tracing::subscriber::set_global_default(global_logger) + .expect("internal error: fail to setup log subscriber"); + Ok(()) + } +} diff --git a/rocketemu/test_common/src/rtl_config.rs b/rocketemu/test_common/src/rtl_config.rs new file mode 100644 index 000000000..0daf72624 --- /dev/null +++ b/rocketemu/test_common/src/rtl_config.rs @@ -0,0 +1,20 @@ +pub struct RTLConfig { + pub vlen: u32, + pub dlen: u32, +} + +// TODO: read from json + +impl RTLConfig { + pub fn xlen(&self) -> u32 { + 32 // TODO: configurable + } + + pub fn vlen_in_bytes(&self) -> u32 { + self.vlen / 8 + } + + pub fn lane_num(&self) -> u32 { + self.dlen / self.xlen() + } +} diff --git a/rocketemu/test_common/src/spike_runner.rs b/rocketemu/test_common/src/spike_runner.rs new file mode 100644 index 000000000..2d69d4642 --- /dev/null +++ b/rocketemu/test_common/src/spike_runner.rs @@ -0,0 +1,145 @@ +use std::collections::VecDeque; +use std::path::Path; +use tracing::debug; + +use spike_rs::spike_event::SpikeEvent; +use spike_rs::util::load_elf; +use spike_rs::Spike; + +use crate::CommonArgs; + +pub struct SpikeRunner { + spike: Box, + + /// commit queue + /// in the spike thread, spike should detech if this queue is full, if not + /// full, execute until a vector instruction, record the behavior of this + /// instruction, and send to commit queue. + /// Note: + /// - The event issued earliest is at the back of the queue + /// - The queue may contain at most one unissued event. If so, the unissued event must be at the + /// front of the queue, and it must be a fence + pub commit_queue: VecDeque, + + /// config for v extension + pub vlen: u32, + pub dlen: u32, + + /// implement the get_t() for mcycle csr update + pub cycle: u64, + + /// for mcycle csr update + pub spike_cycle: u64, + + pub do_log_vrf: bool, +} + +impl SpikeRunner { + pub fn new(args: &CommonArgs, do_log_vrf: bool) -> Self { + // load the elf file + // initialize spike + let mut spike = args.to_spike_c_handler(); + + let entry_addr = load_elf(&mut spike, Path::new(&args.elf_file)).unwrap(); + + // initialize processor + let proc = spike.get_proc(); + let state = proc.get_state(); + proc.reset(); + state.set_pc(entry_addr); + + SpikeRunner { + spike, + commit_queue: VecDeque::new(), + vlen: args.vlen, + dlen: args.dlen, + cycle: 0, + spike_cycle: 0, + do_log_vrf, + } + } + + pub fn load_elf(&mut self, fname: &Path) -> anyhow::Result { + load_elf(&mut *self.spike, fname) + } + + // just execute one instruction for non-difftest + pub fn exec(&self) -> anyhow::Result<()> { + let spike = &self.spike; + let proc = spike.get_proc(); + let state = proc.get_state(); + + let new_pc = proc.func(); + + state.handle_pc(new_pc).unwrap(); + + let ret = state.exit(); + + if ret == 0 { + return Err(anyhow::anyhow!("simulation finished!")); + } + + Ok(()) + } + + // execute the spike processor for one instruction and record + // the spike event for difftest + pub fn spike_step(&mut self) -> SpikeEvent { + let spike = &self.spike; + let proc = self.spike.get_proc(); + let state = proc.get_state(); + + state.set_mcycle((self.cycle + self.spike_cycle) as usize); + + let pc = state.get_pc(); + let disasm = proc.disassemble(); + let insn_bits = proc.get_insn(); + + let mut event = SpikeEvent::new(spike, self.do_log_vrf); + state.clear(); + + let new_pc = if event.is_v() || event.is_exit() { + // inst is v / quit + debug!( + "SpikeStep: spike run vector insn ({}), is_vfence={}", + event.describe_insn(), + event.is_vfence(), + ); + event.pre_log_arch_changes(spike, self.vlen).unwrap(); + let new_pc_ = proc.func(); + event.log_arch_changes(spike, self.vlen).unwrap(); + new_pc_ + } else { + // inst is scalar + debug!( + "SpikeStep: spike run scalar insn (pc={:#x}, disasm={}, bits={:#x})", + pc, disasm, insn_bits, + ); + let new_pc_ = proc.func(); + event.log_mem_write(spike).unwrap(); + new_pc_ + }; + + state.handle_pc(new_pc).unwrap(); + + self.spike_cycle += 1; + + event + } + + pub fn find_v_se_to_issue(&mut self) -> SpikeEvent { + if !self.commit_queue.is_empty() && self.commit_queue.front().unwrap().is_vfence() { + // if the front (latest) se is a vfence, return the vfence + self.commit_queue.front().unwrap().clone() + } else { + // else, loop until find a se, and push the se to the front + loop { + let se = self.spike_step(); + if se.is_v() { + self.commit_queue.push_front(se.clone()); + break se.clone(); + } + } + } + } +} From e5cbf47c102a23a3518da07e453b9f9f21ddac4f Mon Sep 17 00:00:00 2001 From: Avimitin Date: Fri, 26 Jul 2024 19:19:22 +0800 Subject: [PATCH 112/140] [nix] add difftest derivation for rocketv Signed-off-by: Avimitin --- nix/t1/default.nix | 2 +- rocketemu/Cargo.lock | 35 +++++++++++++++++++++++ rocketemu/Cargo.toml | 1 + rocketemu/default.nix | 55 ++++++++++++++++++++++++++++++++++-- rocketemu/driver/build.rs | 2 +- rocketemu/driver/src/sim.rs | 33 ---------------------- rocketemu/offline/Cargo.toml | 1 + 7 files changed, 92 insertions(+), 37 deletions(-) diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 2c6b4ade5..12c6e68db 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -39,7 +39,7 @@ lib.makeScope newScope rocketv-mlirbc = self.callPackage ./rocketv-mlirbc.nix { }; rocketv-rtl = self.callPackage ./rocketv-rtl.nix { }; rocketv-verilated-csrc = self.callPackage ./rocketv-verilated-csrc.nix { }; - rocketv-emu = self.callPackage ../../rocketemu { }; + rocketv = self.callPackage ../../rocketemu { }; omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/rocketemu/Cargo.lock b/rocketemu/Cargo.lock index a7d3952fd..4f4c7afe7 100644 --- a/rocketemu/Cargo.lock +++ b/rocketemu/Cargo.lock @@ -66,6 +66,12 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + [[package]] name = "cfg-if" version = "1.0.0" @@ -224,6 +230,34 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "offline" version = "0.1.0" @@ -232,6 +266,7 @@ dependencies = [ "clap", "common", "libloading", + "num-bigint", "serde", "serde_json", "spike_rs", diff --git a/rocketemu/Cargo.toml b/rocketemu/Cargo.toml index cfe79c8a4..b34153b2a 100644 --- a/rocketemu/Cargo.toml +++ b/rocketemu/Cargo.toml @@ -20,3 +20,4 @@ tracing = "0.1.40" tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +num-bigint = "0.4.6" diff --git a/rocketemu/default.nix b/rocketemu/default.nix index 1c0a81dde..cb5c34a1e 100644 --- a/rocketemu/default.nix +++ b/rocketemu/default.nix @@ -1,7 +1,58 @@ { lib , newScope +, rustPlatform +, libspike +, zlib +, rocketv-verilated-csrc }: -lib.makeScope newScope (scope: { +lib.makeScope newScope (scope: rec { c-dpi-lib = scope.callPackage ./dpi { }; - driver = scope.callPackage ./driver { }; + + # FIXME: merge with difftest and put it under the nix/pkgs + spike_interfaces = scope.callPackage ../difftest/spike_interfaces { }; + + emu = rustPlatform.buildRustPackage { + name = "rocketemu"; + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./driver + ./offline + ./spike_rs + ./test_common + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + zlib + spike_interfaces + ]; + + # FIXME: can we hack this into derivations, so that we don't need to specify library dir explicitly? + env = + let + toLib = drv: "${drv}/lib"; + in + { + ROCKET_DPI_DIR = toLib c-dpi-lib; + TESTBENCH_LIB_DIR = toLib rocketv-verilated-csrc; + SPIKE_LIB_DIR = toLib libspike; + SPIKE_INTERFACES_LIB_DIR = toLib spike_interfaces; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + outputs = [ "out" "driver" "offline" ]; + + postInstall = '' + mkdir -p $driver/bin $offline/bin + ln -s $out/bin/driver $driver/bin/driver + ln -s $out/bin/offline $driver/bin/offline + ''; + }; }) diff --git a/rocketemu/driver/build.rs b/rocketemu/driver/build.rs index 7b1e05015..748eeea21 100644 --- a/rocketemu/driver/build.rs +++ b/rocketemu/driver/build.rs @@ -3,7 +3,7 @@ fn main() { SEARCH_DIRS.iter().for_each(|env| { let dir = std::env::var(env).unwrap_or_else(|_| panic!("ERROR: {} environment variable not set", &env)); - println!("cargo:rustc-link-search=native={}/lib", &dir); + println!("cargo:rustc-link-search=native={}", &dir); println!("cargo:rerun-if-env-changed={}", env); }); diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index 2c9eb45f5..fb8596940 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -327,36 +327,3 @@ impl Simulator { dump_wave(&self.wave_path); } } - -#[cfg(test)] -mod test { - use super::*; - use std::process::Command; - - #[test] - fn test_load_elf() { - let output = Command::new("nix") - .args([ - "build", - "--no-warn-dirty", - "--print-out-paths", - "--no-link", - ".#riscv-tests", - ]) - .output() - .expect("fail to get riscv-test path"); - if !output.status.success() { - panic!("fail to build riscv-test"); - } - - let test_path = String::from_utf8_lossy(&output.stdout).to_string(); - - Simulator::load_elf(Path::new(&test_path)).unwrap(); - } - - #[test] - fn x86_should_fail() { - let err = Simulator::load_elf(Path::new("/bin/cp")).unwrap_err(); - assert_eq!(format!("{}", err), "ELF is not in RISC-V") - } -} diff --git a/rocketemu/offline/Cargo.toml b/rocketemu/offline/Cargo.toml index 1c76f647a..2824a161e 100644 --- a/rocketemu/offline/Cargo.toml +++ b/rocketemu/offline/Cargo.toml @@ -10,6 +10,7 @@ tracing-subscriber = { workspace = true } anyhow = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +num-bigint = { workspace = true } libloading = "0.8.1" xmas-elf = "0.9.1" From 6b689fbe39b95c9bc73f9f67224562da1affb5c7 Mon Sep 17 00:00:00 2001 From: Porterlu <1258210724@qq.com> Date: Thu, 25 Jul 2024 00:35:18 +0800 Subject: [PATCH 113/140] [rocketemu] set the resetvector(width: 64) using information from an ELF file --- rocketemu/dpi/dpi.cc | 5 +++++ rocketemu/dpi/dpi.h | 3 +++ rocketemu/driver/result | 2 +- rocketemu/driver/src/dpi.rs | 8 ++++++++ rocketemu/driver/src/sim.rs | 6 ++++-- rocketemu/src/TestBench.scala | 4 ++-- rocketv/src/RocketTile.scala | 2 +- 7 files changed, 24 insertions(+), 6 deletions(-) diff --git a/rocketemu/dpi/dpi.cc b/rocketemu/dpi/dpi.cc index fcf59050f..c1f6e403b 100644 --- a/rocketemu/dpi/dpi.cc +++ b/rocketemu/dpi/dpi.cc @@ -50,6 +50,11 @@ void axi_read_instructionFetchAXI( /// true. void cosim_init() { dpi_call_target = cosim_init_rs(); } +/// dynamically set resetvector according to the payload +void get_resetvector(long long *resetvector) { + get_resetvector_rs(dpi_call_target, resetvector); +} + /// evaluate at every 1024 cycles, return reason = 0 to continue simulation, /// other value is used as error code. void cosim_watchdog(char *reason) { diff --git a/rocketemu/dpi/dpi.h b/rocketemu/dpi/dpi.h index 46f6224e9..6f38b1639 100644 --- a/rocketemu/dpi/dpi.h +++ b/rocketemu/dpi/dpi.h @@ -43,6 +43,9 @@ extern void axi_read_instructionFetchAXI_rs( /// true. returns dpi call target extern void *cosim_init_rs(); +/// evaluate after reset, return the reset vector +extern void *get_resetvector_rs(void *dpi_call_target, long long *resetvector); + /// evaluate at every 1024 cycles, return reason = 0 to continue simulation, /// other value is used as error code. extern void cosim_watchdog_rs(void *dpi_call_target, char *reason); diff --git a/rocketemu/driver/result b/rocketemu/driver/result index e86331456..9b374c6d3 120000 --- a/rocketemu/driver/result +++ b/rocketemu/driver/result @@ -1 +1 @@ -/nix/store/vp6gwp37wwsal5wgpqydnqqchkrb102h-riscv-tests-riscv64-none-elf-7878085d2546af0eb7af72a1df00996d5d8c43fb \ No newline at end of file +/nix/store/2yl3kijw09n499mipba5irh61q1s18sb-riscv-tests-riscv32-none-elf-7878085d2546af0eb7af72a1df00996d5d8c43fb \ No newline at end of file diff --git a/rocketemu/driver/src/dpi.rs b/rocketemu/driver/src/dpi.rs index 8103ebb1c..48ec129ac 100644 --- a/rocketemu/driver/src/dpi.rs +++ b/rocketemu/driver/src/dpi.rs @@ -163,6 +163,14 @@ unsafe extern "C" fn cosim_init_rs(call_init: *mut SvBit) -> *mut () { Box::into_raw(sim) as *mut () } +#[no_mangle] +unsafe extern "C" fn get_resetvector_rs(target: *mut (), resetvector: *mut c_longlong) { + if !target.is_null() { + let sim = &mut *(target as *mut Simulator); + *resetvector = sim.e_entry as c_longlong + } +} + #[no_mangle] unsafe extern "C" fn cosim_watchdog_rs(target: *mut (), reason: *mut c_char) { // watchdog dpi call would be called before initialization, guard on null target diff --git a/rocketemu/driver/src/sim.rs b/rocketemu/driver/src/sim.rs index fb8596940..0420a7f8c 100644 --- a/rocketemu/driver/src/sim.rs +++ b/rocketemu/driver/src/sim.rs @@ -112,6 +112,7 @@ pub struct Simulator { pub(crate) fn_sym_tab: FunctionSymTab, pub(crate) dlen: u32, pub(crate) timeout: u64, + pub(crate) e_entry: u64, #[cfg(feature = "trace")] wave_path: String, @@ -140,8 +141,8 @@ impl Simulator { tracing::subscriber::set_global_default(global_logger) .expect("internal error: fail to setup log subscriber"); - // FIXME: pass e_entry to rocket - let (_FIXME_e_entry, mem, fn_sym_tab) = + // pass e_entry to rocket + let (e_entry, mem, fn_sym_tab) = Self::load_elf(&args.elf_file).expect("fail creating simulator"); #[cfg(feature = "trace")] @@ -154,6 +155,7 @@ impl Simulator { dlen: option_env!("DESIGN_DLEN") .map(|dlen| dlen.parse().expect("fail to parse dlen into u32 digit")) .unwrap_or(256), + e_entry: e_entry, #[cfg(feature = "trace")] wave_path: args.wave_path.to_owned(), diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index ebf8008d1..63d93a80d 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -69,8 +69,8 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar dut.io.msip := 0.U dut.io.buserror := 0.U - // FIXME: get resetVector from simulator instead of hard code here - dut.io.resetVector := (BigInt(1) << 31).U + // get resetVector from simulator + dut.io.resetVector := RawUnclockedNonVoidFunctionCall("get_resetvector", Const(UInt(64.W)))(simulationTime === 0.U) // output probes val rocketProbe = probe.read(dut.io.rocketProbe) diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala index 2a8ce3d8c..33c2082d7 100644 --- a/rocketv/src/RocketTile.scala +++ b/rocketv/src/RocketTile.scala @@ -368,7 +368,7 @@ case class RocketTileParameter( class RocketTileInterface(parameter: RocketTileParameter) extends Bundle { val clock = Input(Clock()) val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) - // todo: Const + val hartid = Flipped(UInt(parameter.hartIdLen.W)) val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) From f199c04507f9fe9f08f823d7727f4b44048a7447 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Sat, 27 Jul 2024 23:52:52 +0800 Subject: [PATCH 114/140] [rocketemu] add rustfmt --- rocketemu/.rustfmt.toml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 rocketemu/.rustfmt.toml diff --git a/rocketemu/.rustfmt.toml b/rocketemu/.rustfmt.toml new file mode 100644 index 000000000..7b6c82e24 --- /dev/null +++ b/rocketemu/.rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = false +tab_spaces = 2 +chain_width = 100 +struct_lit_width = 50 \ No newline at end of file From 53e21a2bd09e548d9bc1a5a87d8df320cab909d4 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Sat, 27 Jul 2024 23:54:33 +0800 Subject: [PATCH 115/140] [rocketemu] build rocket offline difftest [rocketemu] catch watchdog timeout event in offline difftest [rocketemu] optimize difftest loop [rocketemu] should not soft link the nix result directly [rocketemu] spike event record reg write idx with hex [rocketemu] add support for msu priviledge [rocketemu] skip check when spike/rtl reg write idx == 0 --- rocketemu/driver/result | 1 - rocketemu/offline/src/difftest.rs | 76 ++-- rocketemu/offline/src/json_events.rs | 404 +--------------------- rocketemu/spike_rs/src/spike_event.rs | 211 ++--------- rocketemu/src/TestBench.scala | 2 +- rocketemu/test_common/src/lib.rs | 2 +- rocketemu/test_common/src/spike_runner.rs | 58 +--- 7 files changed, 64 insertions(+), 690 deletions(-) delete mode 120000 rocketemu/driver/result diff --git a/rocketemu/driver/result b/rocketemu/driver/result deleted file mode 120000 index 9b374c6d3..000000000 --- a/rocketemu/driver/result +++ /dev/null @@ -1 +0,0 @@ -/nix/store/2yl3kijw09n499mipba5irh61q1s18sb-riscv-tests-riscv32-none-elf-7878085d2546af0eb7af72a1df00996d5d8c43fb \ No newline at end of file diff --git a/rocketemu/offline/src/difftest.rs b/rocketemu/offline/src/difftest.rs index 66a0173d9..573fbb756 100644 --- a/rocketemu/offline/src/difftest.rs +++ b/rocketemu/offline/src/difftest.rs @@ -1,6 +1,5 @@ use common::spike_runner::SpikeRunner; use std::path::Path; -use tracing::info; use common::rtl_config::RTLConfig; use common::CommonArgs; @@ -29,61 +28,26 @@ impl Difftest { } pub fn diff(&mut self) -> anyhow::Result<()> { - self.runner.check_and_clear_fence(); - - let event = self.dut.step()?; - - match event { - JsonEvents::SimulationStart { cycle } => { - self.runner.cycle = *cycle; - Ok(()) - } - JsonEvents::SimulationStop { reason, cycle } => { - info!("simulation stopped at cycle {}, reason {}", cycle, reason); - self.runner.cycle = *cycle; - Ok(()) - } - JsonEvents::Issue { idx, cycle } => { - self.runner.cycle = *cycle; - self.runner.peek_issue(&IssueEvent { idx: *idx, cycle: *cycle }) - } - JsonEvents::MemoryWrite { mask, data, lsu_idx, address, cycle } => { - self.runner.cycle = *cycle; - self.runner.peek_memory_write(&MemoryWriteEvent { - mask: mask.clone(), - data: data.clone(), - lsu_idx: *lsu_idx, - address: *address, - cycle: *cycle, - }) - } - JsonEvents::LsuEnq { enq, cycle } => { - self.runner.cycle = *cycle; - self.runner.update_lsu_idx(&LsuEnqEvent { enq: *enq, cycle: *cycle }) - } - JsonEvents::VrfWrite { issue_idx, vd, offset, mask, data, lane, cycle } => { - self.runner.cycle = *cycle; - self.runner.peek_vrf_write(&VrfWriteEvent { - issue_idx: *issue_idx, - vd: *vd, - offset: *offset, - mask: mask.clone(), - data: data.clone(), - lane: *lane, - cycle: *cycle, - }) - } - JsonEvents::CheckRd { data, issue_idx, cycle } => { - self.runner.cycle = *cycle; - self.runner.check_rd(&CheckRdEvent { data: *data, issue_idx: *issue_idx, cycle: *cycle }) - } - JsonEvents::VrfScoreboardReport { count, issue_idx, cycle } => { - self.runner.cycle = *cycle; - self.runner.vrf_scoreboard_report(&VrfScoreboardReportEvent { - count: *count, - issue_idx: *issue_idx, - cycle: *cycle, - }) + loop { + let se = self.runner.spike_step(); + if se.is_exit() { + return Err(anyhow::anyhow!("exit detected")); + } + if se.is_rd_written() && se.rd_idx != 0 { + let event = self.dut.step()?; + + match event { + JsonEvents::RegWrite { addr, data, cycle } => { + self.runner.cycle = *cycle; + self.runner.check_reg_write( + &RegWriteEvent { addr: *addr, data: *data, cycle: *cycle }, + &se, + )? + } + JsonEvents::SimulationStop { reason, cycle } => { + return Err(anyhow::anyhow!("[{}] simulation stop: {}", *cycle, *reason)); + } + } } } } diff --git a/rocketemu/offline/src/json_events.rs b/rocketemu/offline/src/json_events.rs index 24652f04d..ed61c23da 100644 --- a/rocketemu/offline/src/json_events.rs +++ b/rocketemu/offline/src/json_events.rs @@ -1,409 +1,39 @@ use common::spike_runner::SpikeRunner; -use num_bigint::BigUint; -use serde::{Deserialize, Deserializer}; -use spike_rs::spike_event::LSU_IDX_DEFAULT; -use tracing::{debug, info}; - -#[derive(Deserialize, Debug, PartialEq, Clone)] -pub enum Opcode { - PutFullData = 0, - PutPartialData = 1, - Get = 4, - // AccessAckData = 0, - // AccessAck = 0, -} - -fn bigint_to_vec_u8<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let s: &str = Deserialize::deserialize(deserializer)?; - let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) - .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; - Ok(bigint.to_bytes_le()) -} - -fn bigint_to_vec_bool<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let s: &str = Deserialize::deserialize(deserializer)?; - let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) - .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; - let bytes = bigint.to_bytes_le(); - let bools = bytes.iter().flat_map(|byte| (0..8).map(move |i| (byte >> i) & 1u8 == 1u8)).collect(); - - Ok(bools) -} - -fn hex_to_u32<'de, D>(deserializer: D) -> Result -where - D: Deserializer<'de>, -{ - let s: &str = Deserialize::deserialize(deserializer)?; - let value = - u32::from_str_radix(s.trim_start_matches(' '), 16).map_err(serde::de::Error::custom)?; - - Ok(value) -} - -fn mask_display(mask: &Vec) -> String { - mask.into_iter().map(|&b| if b { '1' } else { '0' }).collect() -} +use serde::Deserialize; +use spike_rs::spike_event::SpikeEvent; +use tracing::info; #[derive(Deserialize, Debug)] #[serde(tag = "event")] pub(crate) enum JsonEvents { - SimulationStart { - cycle: u64, - }, - SimulationStop { - reason: u8, - cycle: u64, - }, - Issue { - idx: u8, - cycle: u64, - }, - LsuEnq { - enq: u32, - cycle: u64, - }, - VrfWrite { - issue_idx: u8, - vd: u32, - offset: u32, - #[serde(deserialize_with = "bigint_to_vec_bool", default)] - mask: Vec, - #[serde(deserialize_with = "bigint_to_vec_u8", default)] - data: Vec, - lane: u32, - cycle: u64, - }, - MemoryWrite { - #[serde(deserialize_with = "bigint_to_vec_bool", default)] - mask: Vec, - #[serde(deserialize_with = "bigint_to_vec_u8", default)] - data: Vec, - lsu_idx: u8, - #[serde(deserialize_with = "hex_to_u32", default)] - address: u32, - cycle: u64, - }, - CheckRd { - #[serde(deserialize_with = "hex_to_u32", default)] - data: u32, - issue_idx: u8, - cycle: u64, - }, - VrfScoreboardReport { - count: u32, - issue_idx: u8, - cycle: u64, - }, -} - -pub struct IssueEvent { - pub idx: u8, - pub cycle: u64, -} - -pub struct LsuEnqEvent { - pub enq: u32, - pub cycle: u64, -} - -pub struct VrfWriteEvent { - pub lane: u32, - pub vd: u32, - pub offset: u32, - pub mask: Vec, - pub data: Vec, - pub issue_idx: u8, - pub cycle: u64, -} - -pub struct MemoryWriteEvent { - pub mask: Vec, - pub data: Vec, - pub lsu_idx: u8, - pub address: u32, - pub cycle: u64, + RegWrite { addr: u32, data: u32, cycle: u64 }, + SimulationStop { reason: u8, cycle: u64 }, } -pub struct VrfScoreboardReportEvent { - pub count: u32, - pub issue_idx: u8, - pub cycle: u64, -} - -pub struct CheckRdEvent { +pub struct RegWriteEvent { + pub addr: u32, pub data: u32, - pub issue_idx: u8, pub cycle: u64, } pub(crate) trait JsonEventRunner { - fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()>; - - fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()>; - - fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()>; - - fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()>; - - fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()>; - - fn check_and_clear_fence(&mut self); - - fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()>; - - fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()>; + fn check_reg_write(&mut self, reg_write: &RegWriteEvent, se: &SpikeEvent) -> anyhow::Result<()>; } impl JsonEventRunner for SpikeRunner { - fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()> { - self.find_v_se_to_issue(); // ensure the front of queue is a new un-issued se - let se = self.commit_queue.front_mut().unwrap(); - if se.is_vfence() { - return Ok(()); - } - - se.issue_idx = issue.idx as u8; + fn check_reg_write(&mut self, reg_write: &RegWriteEvent, se: &SpikeEvent) -> anyhow::Result<()> { + let addr = reg_write.addr; + let data = reg_write.data; + let cycle = reg_write.cycle; + info!("[{cycle}] RegWrite: idx={addr:02x}, data={data:08x}",); info!( - "[{}] SpikePeekIssue: issue_idx={}, pc={:#x}, inst={}", - issue.cycle, issue.idx, se.pc, se.disasm + "[{cycle}] SpikeEvent: idx={:02x}, data={:08x}", + se.rd_idx, se.rd_bits ); + assert_eq!(addr, se.rd_idx, "addr should be equal to se.rd_idx"); + assert_eq!(data, se.rd_bits, "data should be equal to se.rd_bits"); Ok(()) } - - fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()> { - let enq = lsu_enq.enq; - assert!(enq > 0, "enq should be greater than 0"); - let cycle = lsu_enq.cycle; - - if let Some(se) = self - .commit_queue - .iter_mut() - .rev() - .find(|se| (se.is_vload() || se.is_vstore()) && se.lsu_idx == LSU_IDX_DEFAULT) - { - let index = enq.trailing_zeros() as u8; - se.lsu_idx = index; - info!( - "[{cycle}] UpdateLSUIdx: instr ({}) is allocated with lsu_idx: {index}", - se.describe_insn() - ); - } - Ok(()) - } - - fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()> { - let cycle = vrf_write.cycle; - let vlen_in_bytes = self.vlen / 8; - let lane_number = self.dlen / 32; - let record_idx_base = (vrf_write.vd * vlen_in_bytes - + (vrf_write.lane + lane_number * vrf_write.offset) * 4) as usize; - - let mut retire_issue: Option = None; - - if let Some(se) = - self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == vrf_write.issue_idx) - { - debug!( - "[{}] VrfWrite: lane={}, vd={}, idx_base={}, issue_idx={}, offset={}, mask={}, data={:x?} ({})", - vrf_write.cycle, - vrf_write.lane, - record_idx_base, - vrf_write.vd, - vrf_write.issue_idx, - vrf_write.offset, - mask_display(&vrf_write.mask), - vrf_write.data, - se.describe_insn() - ); - - if let Some(unretired_writes) = se.vrf_access_record.unretired_writes { - assert!( - unretired_writes > 0, - "[{}] unretired_writes should be greater than 0, issue_idx={} ({})", - vrf_write.cycle, - vrf_write.issue_idx, - se.describe_insn() - ); - if unretired_writes == 1 { - retire_issue = Some(vrf_write.issue_idx); - } - se.vrf_access_record.unretired_writes = Some(unretired_writes - 1); - } else { - se.vrf_access_record.retired_writes += 1; - } - - vrf_write.mask.iter().enumerate().filter(|(_, &mask)| mask).for_each(|(offset, _)| { - let written_byte = *vrf_write.data.get(offset).unwrap_or(&0); - - if let Some(record) = se.vrf_access_record.all_writes.get_mut(&(record_idx_base + offset)) { - assert_eq!( - record.byte, - written_byte, - "[{}] {offset}th byte incorrect ({:02x} record != {written_byte:02x} written) \ - for vrf write (lane={}, vd={}, offset={}, mask={}, data={:x?}) \ - issue_idx={} [vrf_idx={}] (disasm: {}, pc: {:#x}, bits: {:#x})", - vrf_write.cycle, - record.byte, - vrf_write.lane, - vrf_write.vd, - vrf_write.offset, - mask_display(&vrf_write.mask), - vrf_write.data, - se.issue_idx, - record_idx_base + offset, - se.disasm, - se.pc, - se.inst_bits - ); - record.executed = true; - } else { - debug!( - "[{}] cannot find vrf write record, maybe not changed (lane={}, vd={}, idx={}, offset={}, mask={}, data={:x?})", - vrf_write.cycle, - vrf_write.lane, - vrf_write.vd, - record_idx_base + offset, - vrf_write.offset, - mask_display(&vrf_write.mask), - vrf_write.data - ); - } - }) - } else { - info!( - "[{cycle}] RecordRFAccess: rtl detect vrf write on lane={}, vd={} \ - with no matched se (issue_idx={}), \ - maybe from committed load insn", - vrf_write.lane, vrf_write.vd, vrf_write.issue_idx - ); - } - - if let Some(issue_idx) = retire_issue { - self.retire(cycle, issue_idx).unwrap(); - } - - Ok(()) - } - - fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()> { - let data = memory_write.data.to_owned(); - let mask = memory_write.mask.to_owned(); - let cycle = memory_write.cycle; - let base_addr = memory_write.address; - let lsu_idx = memory_write.lsu_idx; - - if let Some(se) = self.commit_queue.iter_mut().find(|se| se.lsu_idx == lsu_idx) { - info!("[{cycle}] MemoryWrite: address={base_addr:08x}, size={}, data={data:x?}, mask={}, pc = {:#x}, disasm = {}", data.len(), mask_display(&mask), se.pc, se.disasm); - // compare with spike event record - mask.iter().enumerate() - .filter(|(_, &mask)| mask) - .for_each(|(offset, _)| { - let byte_addr = base_addr + offset as u32; - let data_byte = *data.get(offset).unwrap_or(&0); - let mem_write = - se.mem_access_record.all_writes.get_mut(&byte_addr).unwrap_or_else(|| { - panic!("[{cycle}] cannot find mem write of byte_addr {byte_addr:08x}") - }); - let single_mem_write_val = mem_write.writes[mem_write.num_completed_writes].val; - mem_write.num_completed_writes += 1; - assert_eq!(single_mem_write_val, data_byte, "[{cycle}] expect mem write of byte {single_mem_write_val:02X}, actual byte {data_byte:02X} (byte_addr={byte_addr:08X}, pc = {:#x}, disasm = {})", se.pc, se.disasm); - }); - return Ok(()); - } - - panic!("[{cycle}] cannot find se with instruction lsu_idx={lsu_idx}") - } - - fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()> { - let count = report.count; - let issue_idx = report.issue_idx; - let cycle = report.cycle; - - let mut should_retire: Option = None; - - if let Some(se) = self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == issue_idx) { - assert!( - se.vrf_access_record.retired_writes <= count, - "[{cycle}] retired_writes({}) should be less than count({count}), issue_idx={issue_idx} ({})", - se.vrf_access_record.retired_writes, se.describe_insn() - ); - - // if instruction writes rd, it will retire in check_rd() - if count == se.vrf_access_record.retired_writes && !se.is_rd_written { - should_retire = Some(issue_idx); - } - // if all writes are committed, retire the se - se.vrf_access_record.unretired_writes = Some(count - se.vrf_access_record.retired_writes); - - info!( - "[{cycle}] VrfScoreboardReport: count={count}, issue_idx={issue_idx}, retired={} ({})", - se.vrf_access_record.retired_writes, - se.describe_insn() - ); - } else { - panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}"); - } - - if let Some(issue_idx) = should_retire { - self.retire(cycle, issue_idx).unwrap(); - } - - Ok(()) - } - - /// after update, if instructions before fence are cleared, fence is also cleared - fn check_and_clear_fence(&mut self) { - if !self.commit_queue.is_empty() { - let se = self.commit_queue.back().unwrap(); - - if se.is_vfence() && self.commit_queue.len() == 1 { - self.commit_queue.pop_back(); - } - } - } - - fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()> { - let data = check_rd.data; - let cycle = check_rd.cycle; - let issue_idx = check_rd.issue_idx; - - let se = - self.commit_queue.iter_mut().find(|se| se.issue_idx == issue_idx).unwrap_or_else(|| { - panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}") - }); - - info!("[{cycle}] CheckRd: issue_idx={issue_idx}, data={data:x?}"); - - se.check_rd(data).expect("Failed to check_rd"); - - self.retire(cycle, issue_idx).unwrap(); - - Ok(()) - } - - fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()> { - if let Some(idx) = self.commit_queue.iter().position(|se| se.issue_idx == issue_idx) { - if let Some(se) = self.commit_queue.remove(idx) { - info!( - "[{cycle}] Retire: retire se with issue_idx={issue_idx}, ({})", - se.describe_insn() - ); - se.check_is_ready_for_commit(cycle).unwrap(); - } else { - panic!("[{cycle}] Retire: cannot remove se with instruction issue_idx={issue_idx}") - } - } else { - panic!("[{cycle}] Retire: cannot find se with instruction issue_idx={issue_idx}") - } - Ok(()) - } } diff --git a/rocketemu/spike_rs/src/spike_event.rs b/rocketemu/spike_rs/src/spike_event.rs index 611f7156b..7f6a2f030 100644 --- a/rocketemu/spike_rs/src/spike_event.rs +++ b/rocketemu/spike_rs/src/spike_event.rs @@ -95,6 +95,8 @@ pub struct SpikeEvent { pub vd_write_record: VdWriteRecord, pub mem_access_record: MemAccessRecord, pub vrf_access_record: VrfAccessRecord, + + pub exit: bool, } impl SpikeEvent { @@ -143,6 +145,8 @@ impl SpikeEvent { vd_write_record: Default::default(), mem_access_record: Default::default(), vrf_access_record: Default::default(), + + exit: false, } } @@ -210,6 +214,10 @@ impl SpikeEvent { self.opcode() == 0b0100011 || self.is_cw() } + pub fn is_rd_written(&self) -> bool { + self.is_rd_written + } + pub fn is_whole(&self) -> bool { self.mop() == 0 && self.lumop() == 8 } @@ -223,14 +231,7 @@ impl SpikeEvent { } pub fn is_exit(&self) -> bool { - let is_csr_type = self.opcode() == 0b1110011 && ((self.width() & 0b011) != 0); - let is_csr_write = is_csr_type && (((self.width() & 0b100) | self.rs1()) != 0); - - is_csr_write && self.csr() == 0x7cc - } - - pub fn is_vfence(&self) -> bool { - self.is_exit() // only exit instruction is treated as fence now + self.exit } pub fn is_rd_fp(&self) -> bool { @@ -282,114 +283,11 @@ impl SpikeEvent { pub fn describe_insn(&self) -> String { format!( "pc={:#x}, disasm='{}', bits={:#x}", - self.pc, self.disasm, self.inst_bits + self.pc as u32, self.disasm, self.inst_bits ) } - pub fn get_vrf_write_range(&self, vlen_in_bytes: u32) -> anyhow::Result<(u32, u32)> { - if self.is_vstore() { - return Ok((0, 0)); - } - - if self.is_vload() { - let vd_bytes_start = self.rd_idx * vlen_in_bytes; - if self.is_whole() { - return Ok((vd_bytes_start, vlen_in_bytes * (1 + self.vnf))); - } - let len = if self.vlmul() & 0b100 != 0 { - vlen_in_bytes * (1 + self.vnf) - } else { - (vlen_in_bytes * (1 + self.vnf)) << self.vlmul() - }; - return Ok((vd_bytes_start, len)); - } - - let vd_bytes_start = self.rd_idx * vlen_in_bytes; - - if self.is_mask_vd() { - return Ok((vd_bytes_start, vlen_in_bytes)); - } - - let len = if self.vlmul() & 0b100 != 0 { - vlen_in_bytes >> (8 - self.vlmul()) - } else { - vlen_in_bytes << self.vlmul() - }; - - Ok(( - vd_bytes_start, - if self.is_widening() { len * 2 } else { len }, - )) - } - - pub fn pre_log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { - if self.do_log_vrf { - self.rd_bits = spike.get_proc().get_rd(); - - // record the vrf writes before executing the insn - let vlen_in_bytes = vlen; - - let proc = spike.get_proc(); - let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); - self.vd_write_record.vd_bytes.resize(len as usize, 0u8); - for i in 0..len { - let offset = start + i; - let vreg_index = offset / vlen_in_bytes; - let vreg_offset = offset % vlen_in_bytes; - let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); - self.vd_write_record.vd_bytes[i as usize] = cur_byte; - } - } - - Ok(()) - } - - pub fn log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { - if self.do_log_vrf { - self.log_vrf_write(spike, vlen).unwrap(); - self.log_reg_write(spike).unwrap(); - } - self.log_mem_write(spike).unwrap(); - self.log_mem_read(spike).unwrap(); - - Ok(()) - } - - fn log_vrf_write(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { - let proc = spike.get_proc(); - // record vrf writes - // note that we do not need log_reg_write to find records, we just decode the - // insn and compare bytes - let vlen_in_bytes = vlen / 8; - let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); - trace!("vrf write range: start: {start}, len: {len}"); - for i in 0..len { - let offset = start + i; - let origin_byte = self.vd_write_record.vd_bytes[i as usize]; - let vreg_index = offset / vlen_in_bytes; - let vreg_offset = offset % vlen_in_bytes; - let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); - if origin_byte != cur_byte { - self - .vrf_access_record - .all_writes - .entry(offset as usize) - .or_insert(SingleVrfWrite { byte: cur_byte, executed: false }); - trace!( - "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, change_to={cur_byte}, vrf_idx={offset}", - vec![offset / vlen_in_bytes, offset % vlen_in_bytes], - ); - } else { - trace!( - "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, not changed, vrf_idx={offset}", - vec![offset / vlen_in_bytes, offset % vlen_in_bytes], - ); - } - } - Ok(()) - } - - fn log_reg_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + pub fn log_reg_write(&mut self, spike: &Spike) -> anyhow::Result<()> { let proc = spike.get_proc(); let state = proc.get_state(); // in spike, log_reg_write is arrange: @@ -405,29 +303,17 @@ impl SpikeEvent { // scalar rf let data = state.get_reg(self.rd_idx, false); self.is_rd_written = true; - if data != self.rd_bits { - trace!( - "ScalarRFChange: idx={}, change_from={}, change_to={data}", - self.rd_idx, - self.rd_bits - ); - self.rd_bits = data; - } + self.rd_bits = data; + trace!("ScalarRFChange: idx={:02x}, data={:08x}", self.rd_idx, self.rd_bits); } 0b0001 => { let data = state.get_reg(self.rd_idx, true); self.is_rd_written = true; - if data != self.rd_bits { - trace!( - "FloatRFChange: idx={}, change_from={}, change_to={data}", - self.rd_idx, - self.rd_bits - ); - self.rd_bits = data; - } + self.rd_bits = data; + trace!("FloatRFChange: idx={:02x}, data={:08x}", self.rd_idx, self.rd_bits); } _ => trace!( - "UnknownRegChange, idx={}, spike detect unknown reg change", + "UnknownRegChange, idx={:02x}, spike detect unknown reg change", state.get_reg_write_index(idx) ), }); @@ -455,69 +341,12 @@ impl SpikeEvent { }); }); trace!("SpikeMemWrite: addr={addr:x}, value={value:x}, size={size}"); + if addr == 0x4000_0000 && value == 0xdead_beef && size == 4 { + self.exit = true; + return; + } }); Ok(()) } - - fn log_mem_read(&mut self, spike: &Spike) -> anyhow::Result<()> { - let proc = spike.get_proc(); - let state = proc.get_state(); - - let mem_read_size = state.get_mem_read_size(); - (0..mem_read_size).for_each(|i| { - let (addr, size) = state.get_mem_read(i); - let mut value = 0; - (0..size).for_each(|offset| { - let byte = spike.mem_byte_on_addr(addr as usize + offset as usize).unwrap(); - value |= (byte as u64) << (offset * 8); - // record the read - self - .mem_access_record - .all_reads - .entry(addr + offset as u32) - .or_insert(MemReadRecord { reads: vec![], num_completed_reads: 0 }) - .reads - .push(SingleMemRead { val: byte, executed: false }); - }); - trace!("SpikeMemRead: addr={addr:08x}, value={value:08x}, size={size}"); - }); - - Ok(()) - } - - pub fn check_rd(&self, data: u32) -> anyhow::Result<()> { - // TODO: rtl should indicate whether resp_bits_data is valid - if self.is_rd_written { - assert_eq!( - data, self.rd_bits, - "expect to write rd[{}] = {}, actual {}", - self.rd_idx, self.rd_bits, data - ); - } - - Ok(()) - } - - pub fn check_is_ready_for_commit(&self, cycle: u64) -> anyhow::Result<()> { - for (addr, record) in &self.mem_access_record.all_writes { - assert_eq!( - record.num_completed_writes, - record.writes.len(), - "[{cycle}] expect to write mem {addr:#x}, not executed when commit, issue_idx={} ({})", - self.issue_idx, - self.describe_insn(), - ); - } - for (idx, record) in &self.vrf_access_record.all_writes { - assert!( - record.executed, - "[{cycle}] expect to write vrf {idx}, not executed when commit, issue_idx={} ({})", - self.issue_idx, - self.describe_insn() - ); - } - - Ok(()) - } } diff --git a/rocketemu/src/TestBench.scala b/rocketemu/src/TestBench.scala index 63d93a80d..035012337 100644 --- a/rocketemu/src/TestBench.scala +++ b/rocketemu/src/TestBench.scala @@ -74,7 +74,7 @@ class TestBench(generator: SerializableModuleGenerator[RocketTile, RocketTilePar // output probes val rocketProbe = probe.read(dut.io.rocketProbe) - when(rocketProbe.rfWen)(printf(cf"""{"event":"RegWrite","addr":${rocketProbe.rfWaddr},"data":${rocketProbe.rfWdata},"cycle":${simulationTime}}\n""")) + when(rocketProbe.rfWen && rocketProbe.rfWaddr =/= 0.U)(printf(cf"""{"event":"RegWrite","addr":${rocketProbe.rfWaddr},"data":${rocketProbe.rfWdata},"cycle":${simulationTime}}\n""")) // Memory Drivers val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] diff --git a/rocketemu/test_common/src/lib.rs b/rocketemu/test_common/src/lib.rs index ae582b77d..17851612e 100644 --- a/rocketemu/test_common/src/lib.rs +++ b/rocketemu/test_common/src/lib.rs @@ -41,7 +41,7 @@ pub static MEM_SIZE: usize = 1usize << 32; impl CommonArgs { pub fn to_spike_c_handler(&self) -> Box { let arch = &format!("vlen:{},elen:32", self.vlen); - let lvl = "M"; + let lvl = "MSU"; Spike::new(arch, &self.set, lvl, (self.dlen / 32) as usize, MEM_SIZE) } diff --git a/rocketemu/test_common/src/spike_runner.rs b/rocketemu/test_common/src/spike_runner.rs index 2d69d4642..24e0a2e29 100644 --- a/rocketemu/test_common/src/spike_runner.rs +++ b/rocketemu/test_common/src/spike_runner.rs @@ -1,4 +1,3 @@ -use std::collections::VecDeque; use std::path::Path; use tracing::debug; @@ -11,16 +10,6 @@ use crate::CommonArgs; pub struct SpikeRunner { spike: Box, - /// commit queue - /// in the spike thread, spike should detech if this queue is full, if not - /// full, execute until a vector instruction, record the behavior of this - /// instruction, and send to commit queue. - /// Note: - /// - The event issued earliest is at the back of the queue - /// - The queue may contain at most one unissued event. If so, the unissued event must be at the - /// front of the queue, and it must be a fence - pub commit_queue: VecDeque, - /// config for v extension pub vlen: u32, pub dlen: u32, @@ -50,7 +39,6 @@ impl SpikeRunner { SpikeRunner { spike, - commit_queue: VecDeque::new(), vlen: args.vlen, dlen: args.dlen, cycle: 0, @@ -91,34 +79,14 @@ impl SpikeRunner { state.set_mcycle((self.cycle + self.spike_cycle) as usize); - let pc = state.get_pc(); - let disasm = proc.disassemble(); - let insn_bits = proc.get_insn(); - let mut event = SpikeEvent::new(spike, self.do_log_vrf); state.clear(); - let new_pc = if event.is_v() || event.is_exit() { - // inst is v / quit - debug!( - "SpikeStep: spike run vector insn ({}), is_vfence={}", - event.describe_insn(), - event.is_vfence(), - ); - event.pre_log_arch_changes(spike, self.vlen).unwrap(); - let new_pc_ = proc.func(); - event.log_arch_changes(spike, self.vlen).unwrap(); - new_pc_ - } else { - // inst is scalar - debug!( - "SpikeStep: spike run scalar insn (pc={:#x}, disasm={}, bits={:#x})", - pc, disasm, insn_bits, - ); - let new_pc_ = proc.func(); - event.log_mem_write(spike).unwrap(); - new_pc_ - }; + // inst is scalar + debug!("SpikeStep: spike run scalar insn ({})", event.describe_insn()); + let new_pc = proc.func(); + event.log_mem_write(spike).unwrap(); + event.log_reg_write(spike).unwrap(); state.handle_pc(new_pc).unwrap(); @@ -126,20 +94,4 @@ impl SpikeRunner { event } - - pub fn find_v_se_to_issue(&mut self) -> SpikeEvent { - if !self.commit_queue.is_empty() && self.commit_queue.front().unwrap().is_vfence() { - // if the front (latest) se is a vfence, return the vfence - self.commit_queue.front().unwrap().clone() - } else { - // else, loop until find a se, and push the se to the front - loop { - let se = self.spike_step(); - if se.is_v() { - self.commit_queue.push_front(se.clone()); - break se.clone(); - } - } - } - } } From d1caf73cf4e7ced12b9e648b5ab1b63b16d9b578 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 6 Aug 2024 11:53:41 +0800 Subject: [PATCH 116/140] [nix] refactor the rocketv subattr Signed-off-by: Avimitin --- nix/t1/default.nix | 4 - rocketemu/default.nix | 116 ++++++++++-------- rocketemu/dpi/default.nix | 8 +- .../nix/mlirbc.nix | 3 +- .../rocketv-rtl.nix => rocketemu/nix/rtl.nix | 4 +- .../nix/verilated-csrc.nix | 4 +- .../configs/{RocketTile.json => default.json} | 0 7 files changed, 77 insertions(+), 62 deletions(-) rename nix/t1/rocketv-mlirbc.nix => rocketemu/nix/mlirbc.nix (91%) rename nix/t1/rocketv-rtl.nix => rocketemu/nix/rtl.nix (84%) rename nix/t1/rocketv-verilated-csrc.nix => rocketemu/nix/verilated-csrc.nix (97%) rename rocketv/configs/{RocketTile.json => default.json} (100%) diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 12c6e68db..2d5829d42 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -35,10 +35,6 @@ lib.makeScope newScope configgen = _millOutput.configgen // { meta.mainProgram = "configgen"; }; t1package = _millOutput.t1package; - # FIXME: move all the rocketv file to an individual directory and put all attribute into one scope. - rocketv-mlirbc = self.callPackage ./rocketv-mlirbc.nix { }; - rocketv-rtl = self.callPackage ./rocketv-rtl.nix { }; - rocketv-verilated-csrc = self.callPackage ./rocketv-verilated-csrc.nix { }; rocketv = self.callPackage ../../rocketemu { }; omreader-unwrapped = self.callPackage ./omreader.nix { }; diff --git a/rocketemu/default.nix b/rocketemu/default.nix index cb5c34a1e..99d9e3a58 100644 --- a/rocketemu/default.nix +++ b/rocketemu/default.nix @@ -3,56 +3,74 @@ , rustPlatform , libspike , zlib -, rocketv-verilated-csrc }: -lib.makeScope newScope (scope: rec { - c-dpi-lib = scope.callPackage ./dpi { }; - - # FIXME: merge with difftest and put it under the nix/pkgs - spike_interfaces = scope.callPackage ../difftest/spike_interfaces { }; - - emu = rustPlatform.buildRustPackage { - name = "rocketemu"; - - src = with lib.fileset; toSource { - root = ./.; - fileset = unions [ - ./driver - ./offline - ./spike_rs - ./test_common - ./Cargo.lock - ./Cargo.toml - ]; - }; - - buildInputs = [ - zlib - spike_interfaces - ]; - - # FIXME: can we hack this into derivations, so that we don't need to specify library dir explicitly? - env = - let - toLib = drv: "${drv}/lib"; - in - { - ROCKET_DPI_DIR = toLib c-dpi-lib; - TESTBENCH_LIB_DIR = toLib rocketv-verilated-csrc; - SPIKE_LIB_DIR = toLib libspike; - SPIKE_INTERFACES_LIB_DIR = toLib spike_interfaces; - }; +let + configsDirectory = ../rocketv/configs; + # allConfigs is a (configName -> configJsonPath) map + allConfigs = lib.mapAttrs' + (fileName: fileType: + assert fileType == "regular" && lib.hasSuffix ".json" fileName; + lib.nameValuePair + (lib.removeSuffix ".json" fileName) + (lib.path.append configsDirectory fileName)) + (builtins.readDir configsDirectory); +in +lib.mapAttrs + (configName: configPath: ( + lib.makeScope newScope (scope: rec { + rocket-config = configPath; + mlirbc = scope.callPackage ./nix/mlirbc.nix { }; + rtl = scope.callPackage ./nix/rtl.nix { }; + verilated-csrc = scope.callPackage ./nix/verilated-csrc.nix { }; + + c-dpi-lib = scope.callPackage ./dpi { }; + + # FIXME: merge with difftest and put it under the nix/pkgs + spike_interfaces = scope.callPackage ../difftest/spike_interfaces { }; + + emu = rustPlatform.buildRustPackage { + name = "rocketemu"; - cargoLock = { - lockFile = ./Cargo.lock; - }; + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./driver + ./offline + ./spike_rs + ./test_common + ./Cargo.lock + ./Cargo.toml + ]; + }; - outputs = [ "out" "driver" "offline" ]; + buildInputs = [ + zlib + spike_interfaces + ]; - postInstall = '' - mkdir -p $driver/bin $offline/bin - ln -s $out/bin/driver $driver/bin/driver - ln -s $out/bin/offline $driver/bin/offline - ''; - }; -}) + env = + let + toLib = drv: "${drv}/lib"; + in + { + ROCKET_DPI_DIR = toLib c-dpi-lib; + TESTBENCH_LIB_DIR = toLib verilated-csrc; + SPIKE_LIB_DIR = toLib libspike; + SPIKE_INTERFACES_LIB_DIR = toLib spike_interfaces; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + outputs = [ "out" "driver" "offline" ]; + + postInstall = '' + mkdir -p $driver/bin $offline/bin + ln -s $out/bin/driver $driver/bin/driver + ln -s $out/bin/offline $driver/bin/offline + ''; + }; + }) + )) # end of mapAttr + allConfigs diff --git a/rocketemu/dpi/default.nix b/rocketemu/dpi/default.nix index f8714b4b6..6c4e1faff 100644 --- a/rocketemu/dpi/default.nix +++ b/rocketemu/dpi/default.nix @@ -3,7 +3,7 @@ , stdenv , cmake , ninja -, rocketv-verilated-csrc +, verilated-csrc }: stdenv.mkDerivation { name = "rocketv-emulator"; @@ -16,12 +16,12 @@ stdenv.mkDerivation { verilator ]; - cmakeFlags = lib.optionals rocketv-verilated-csrc.enable-trace [ + cmakeFlags = lib.optionals verilated-csrc.enable-trace [ "-DVM_TRACE=ON" ]; env = { - VERILATED_INC_DIR = "${rocketv-verilated-csrc}/include"; - VERILATED_LIB_DIR = "${rocketv-verilated-csrc}/lib"; + VERILATED_INC_DIR = "${verilated-csrc}/include"; + VERILATED_LIB_DIR = "${verilated-csrc}/lib"; }; } diff --git a/nix/t1/rocketv-mlirbc.nix b/rocketemu/nix/mlirbc.nix similarity index 91% rename from nix/t1/rocketv-mlirbc.nix rename to rocketemu/nix/mlirbc.nix index c01bd503a..65a25e503 100644 --- a/nix/t1/rocketv-mlirbc.nix +++ b/rocketemu/nix/mlirbc.nix @@ -4,6 +4,7 @@ , circt , elaborator +, rocket-config }: stdenvNoCC.mkDerivation { name = "t1-rocketv-elaborated.mlirbc"; @@ -12,7 +13,7 @@ stdenvNoCC.mkDerivation { buildCommand = '' mkdir elaborate - elaborator rocketemu --target-dir elaborate --rocket-config ${../../rocketv/configs/RocketTile.json} + elaborator rocketemu --target-dir elaborate --rocket-config ${rocket-config} firtool elaborate/*.fir \ --annotation-file elaborate/*.anno.json \ --emit-bytecode \ diff --git a/nix/t1/rocketv-rtl.nix b/rocketemu/nix/rtl.nix similarity index 84% rename from nix/t1/rocketv-rtl.nix rename to rocketemu/nix/rtl.nix index f67521627..053078f82 100644 --- a/nix/t1/rocketv-rtl.nix +++ b/rocketemu/nix/rtl.nix @@ -2,7 +2,7 @@ , lib , circt -, rocketv-mlirbc +, mlirbc }: let @@ -21,6 +21,6 @@ stdenvNoCC.mkDerivation { buildCommand = '' mkdir -p $out - firtool ${rocketv-mlirbc} ${mfcArgs} -o $out + firtool ${mlirbc} ${mfcArgs} -o $out ''; } diff --git a/nix/t1/rocketv-verilated-csrc.nix b/rocketemu/nix/verilated-csrc.nix similarity index 97% rename from nix/t1/rocketv-verilated-csrc.nix rename to rocketemu/nix/verilated-csrc.nix index eb5b4127f..f32ade7af 100644 --- a/nix/t1/rocketv-verilated-csrc.nix +++ b/rocketemu/nix/verilated-csrc.nix @@ -1,7 +1,7 @@ { lib , fetchgit , stdenv -, rocketv-rtl +, rtl , verilator , enable-trace ? true , zlib @@ -21,7 +21,7 @@ in stdenv.mkDerivation { name = "t1-rocketv-verilated"; - src = rocketv-rtl; + src = rtl; nativeBuildInputs = [ verilator ]; diff --git a/rocketv/configs/RocketTile.json b/rocketv/configs/default.json similarity index 100% rename from rocketv/configs/RocketTile.json rename to rocketv/configs/default.json From 5c50fbb1761d135064a973af5c81db3cb79c40c2 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Tue, 6 Aug 2024 12:18:56 +0800 Subject: [PATCH 117/140] [nix] rename default config to "meowth" Signed-off-by: Avimitin --- rocketv/configs/{default.json => meowth.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename rocketv/configs/{default.json => meowth.json} (100%) diff --git a/rocketv/configs/default.json b/rocketv/configs/meowth.json similarity index 100% rename from rocketv/configs/default.json rename to rocketv/configs/meowth.json From 184f21ed7165465d3f8486ff5355a0057a1ee9b0 Mon Sep 17 00:00:00 2001 From: SharzyL Date: Fri, 9 Aug 2024 10:43:37 +0800 Subject: [PATCH 118/140] [difftest] fix rust warnings --- difftest/online_dpi/src/dpi.rs | 5 +++-- difftest/online_dpi/src/drive.rs | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/difftest/online_dpi/src/dpi.rs b/difftest/online_dpi/src/dpi.rs index 9becfc1e0..2ea445d8e 100644 --- a/difftest/online_dpi/src/dpi.rs +++ b/difftest/online_dpi/src/dpi.rs @@ -2,7 +2,7 @@ #![allow(unused_variables)] use clap::Parser; -use std::ffi::{c_char, c_longlong, CString}; +use std::ffi::{c_char, c_longlong}; use std::sync::Mutex; use tracing::debug; @@ -271,10 +271,10 @@ unsafe extern "C" fn retire_vector_mem(dummy: *const SvBitVecVal) { // import functions and wrappers //-------------------------------- +#[cfg(feature = "trace")] mod dpi_export { use std::ffi::c_char; extern "C" { - #[cfg(feature = "trace")] /// `export "DPI-C" function dump_wave(input string file)` pub fn dump_wave(path: *const c_char); } @@ -283,6 +283,7 @@ mod dpi_export { #[cfg(feature = "trace")] pub(crate) fn dump_wave(scope: crate::svdpi::SvScope, path: &str) { use crate::svdpi; + use std::ffi::CString; let path_cstring = CString::new(path).unwrap(); svdpi::set_scope(scope); diff --git a/difftest/online_dpi/src/drive.rs b/difftest/online_dpi/src/drive.rs index 5615b157b..5cc9fb78b 100644 --- a/difftest/online_dpi/src/drive.rs +++ b/difftest/online_dpi/src/drive.rs @@ -99,6 +99,7 @@ pub(crate) struct Driver { spike_runner: SpikeRunner, // SvScope from t1_cosim_init + #[cfg(feature = "trace")] scope: SvScope, #[cfg(feature = "trace")] @@ -161,8 +162,9 @@ impl Driver { let mut self_ = Self { spike_runner: SpikeRunner::new(&args.common_args, false), - scope, + #[cfg(feature = "trace")] + scope, #[cfg(feature = "trace")] wave_path: args.wave_path.to_owned(), #[cfg(feature = "trace")] From a146489b8e528ac38dceb5d1add2212388403112 Mon Sep 17 00:00:00 2001 From: SharzyL Date: Fri, 9 Aug 2024 10:43:51 +0800 Subject: [PATCH 119/140] [script] fix nix attribute --- script/emu/src/Main.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/emu/src/Main.scala b/script/emu/src/Main.scala index 4411b8cf1..c8bcec37f 100644 --- a/script/emu/src/Main.scala +++ b/script/emu/src/Main.scala @@ -62,7 +62,7 @@ object Main: "--no-link", "--print-out-paths", "--no-warn-dirty", - s".#t1.${config}.${caseAttrRoot}.${caseName}" + s".#t1.${config}.ip.${caseAttrRoot}.${caseName}" ) Logger.trace( s"Running `${nixArgs.mkString(" ")}` to get test case ELF file" @@ -80,7 +80,7 @@ object Main: ): os.Path = // FIXME: replace with actual trace emulator here val target = - if (isTrace) then s"${emuType}.difftest" else s"${emuType}.difftest" + if (isTrace) then s"${emuType}.verilator-emu" else s"${emuType}.verilator-emu" val nixArgs = Seq( "nix", "build", From 6f6486e25e1f3cb6599ccc9d2e70fd52703cbb81 Mon Sep 17 00:00:00 2001 From: SharzyL Date: Fri, 12 Jul 2024 12:06:39 +0800 Subject: [PATCH 120/140] [cases] rvv_bench built from upstream source --- difftest/online_dpi/src/dpi.rs | 2 +- tests/default.nix | 10 +- tests/rvv_bench/_include/bench.h | 170 -------- tests/rvv_bench/_include/config.h | 25 -- tests/rvv_bench/_include/nolibc.h | 80 ---- tests/rvv_bench/_include/template.S | 80 ---- tests/rvv_bench/_include/thirdparty/boring.c | 383 ------------------ tests/rvv_bench/_include/thirdparty/boring.h | 31 -- .../_include/thirdparty/rvv-rollback.S | 255 ------------ .../rvv_bench/ascii_to_utf16/ascii_to_utf16.S | 68 ---- .../rvv_bench/ascii_to_utf16/ascii_to_utf16.c | 63 --- .../rvv_bench/ascii_to_utf32/ascii_to_utf32.S | 66 --- .../rvv_bench/ascii_to_utf32/ascii_to_utf32.c | 63 --- tests/rvv_bench/byteswap/byteswap.S | 81 ---- tests/rvv_bench/byteswap/byteswap.c | 79 ---- tests/rvv_bench/chacha20/chacha20.S | 5 - tests/rvv_bench/chacha20/chacha20.c | 61 --- tests/rvv_bench/default.nix | 55 ++- tests/rvv_bench/mandelbrot/mandelbrot.S | 358 ---------------- tests/rvv_bench/mandelbrot/mandelbrot.c | 94 ----- tests/rvv_bench/memcpy/memcpy.S | 153 ------- tests/rvv_bench/memcpy/memcpy.c | 197 --------- tests/rvv_bench/memset/memset.S | 96 ----- tests/rvv_bench/memset/memset.c | 163 -------- tests/rvv_bench/mergelines/mergelines.S | 179 -------- tests/rvv_bench/mergelines/mergelines.c | 75 ---- tests/rvv_bench/poly1305/poly1305.S | 5 - tests/rvv_bench/poly1305/poly1305.c | 64 --- tests/rvv_bench/strlen/strlen.S | 91 ----- tests/rvv_bench/strlen/strlen.c | 76 ---- tests/rvv_bench/t1_runtime.patch | 102 +++++ tests/rvv_bench/utf8_count/utf8_count.S | 213 ---------- tests/rvv_bench/utf8_count/utf8_count.c | 135 ------ 33 files changed, 150 insertions(+), 3428 deletions(-) delete mode 100644 tests/rvv_bench/_include/bench.h delete mode 100644 tests/rvv_bench/_include/config.h delete mode 100644 tests/rvv_bench/_include/nolibc.h delete mode 100644 tests/rvv_bench/_include/template.S delete mode 100644 tests/rvv_bench/_include/thirdparty/boring.c delete mode 100644 tests/rvv_bench/_include/thirdparty/boring.h delete mode 100644 tests/rvv_bench/_include/thirdparty/rvv-rollback.S delete mode 100644 tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S delete mode 100644 tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c delete mode 100644 tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S delete mode 100644 tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c delete mode 100644 tests/rvv_bench/byteswap/byteswap.S delete mode 100644 tests/rvv_bench/byteswap/byteswap.c delete mode 100644 tests/rvv_bench/chacha20/chacha20.S delete mode 100644 tests/rvv_bench/chacha20/chacha20.c delete mode 100644 tests/rvv_bench/mandelbrot/mandelbrot.S delete mode 100644 tests/rvv_bench/mandelbrot/mandelbrot.c delete mode 100644 tests/rvv_bench/memcpy/memcpy.S delete mode 100644 tests/rvv_bench/memcpy/memcpy.c delete mode 100644 tests/rvv_bench/memset/memset.S delete mode 100644 tests/rvv_bench/memset/memset.c delete mode 100644 tests/rvv_bench/mergelines/mergelines.S delete mode 100644 tests/rvv_bench/mergelines/mergelines.c delete mode 100644 tests/rvv_bench/poly1305/poly1305.S delete mode 100644 tests/rvv_bench/poly1305/poly1305.c delete mode 100644 tests/rvv_bench/strlen/strlen.S delete mode 100644 tests/rvv_bench/strlen/strlen.c create mode 100644 tests/rvv_bench/t1_runtime.patch delete mode 100644 tests/rvv_bench/utf8_count/utf8_count.S delete mode 100644 tests/rvv_bench/utf8_count/utf8_count.c diff --git a/difftest/online_dpi/src/dpi.rs b/difftest/online_dpi/src/dpi.rs index 2ea445d8e..464e007a9 100644 --- a/difftest/online_dpi/src/dpi.rs +++ b/difftest/online_dpi/src/dpi.rs @@ -47,7 +47,7 @@ unsafe fn load_from_payload<'a>( let data = &byte_vec[strb_width_in_byte..]; let strb_width_in_bit = std::cmp::min(8, data_width_in_byte); - let mut masks: Vec = strobe + let masks: Vec = strobe .into_iter() .flat_map(|strb| { let mask: Vec = (0..strb_width_in_bit).map(|i| (strb & (1 << i)) != 0).collect(); diff --git a/tests/default.nix b/tests/default.nix index fe062d366..36e5c64c9 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -45,6 +45,11 @@ let builtins.fromJSON (lib.fileContents extraFeatures) else [ ]; + filterByFeatures = caseName: caseDrv: + assert lib.assertMsg (caseDrv ? featuresRequired) "${caseName} doesn't have features specified"; + # Test the case required extensions is supported by rtl design + isSubsetOf currentFeatures caseDrv.featuresRequired; + findAndBuild = dir: build: lib.recurseIntoAttrs (lib.pipe (builtins.readDir dir) [ # filter out all non-directory entrires and underscore-prefixed directories @@ -60,10 +65,7 @@ let inherit caseName sourcePath; }) ) - (lib.filterAttrs (caseName: caseDrv: - assert lib.assertMsg (caseDrv ? featuresRequired) "${caseName} doesn't have features specified"; - # Test the case required extensions is supported by rtl design - isSubsetOf currentFeatures caseDrv.featuresRequired)) + (lib.filterAttrs casesSelf.filterByFeatures) ]); t1main = ./t1_main.S; linkerScript = ./t1.ld; diff --git a/tests/rvv_bench/_include/bench.h b/tests/rvv_bench/_include/bench.h deleted file mode 100644 index 126346d4a..000000000 --- a/tests/rvv_bench/_include/bench.h +++ /dev/null @@ -1,170 +0,0 @@ -#include "config.h" -#include "nolibc.h" - -#ifndef BENCH_NEXT - #define BENCH_NEXT NEXT -#endif - -#define MX(f, F) f(F##_m1) f(F##_m2) f(F##_m4) f(F##_m8) -#define STR(x) STR_(x) -#define STR_(x) #x - -#define ROTL(x, n) (((x) << (n)) | ((x) >> (8 * sizeof(x) - (n)))) - -#if defined(__clang__) || defined(__GNUC__) || defined(__INTEL_COMPILER) - -#define BENCH_CLOBBER() ({ __asm volatile("" ::: "memory"); }) -#define BENCH_VOLATILE(x) \ - ({ __asm volatile("" : "+g"(x) : "g"(x) : "memory"); }) -#define BENCH_VOLATILE_REG(x) \ - ({ __asm volatile("" : "+r"(x) : "r"(x) : "memory"); }) -#define BENCH_VOLATILE_MEM(x) \ - ({ __asm volatile("" : "+m"(x) : "m"(x) : "memory"); }) -#define BENCH_FENCE() ({ __asm volatile("fence.i"); }) - -#define BENCH_MAY_ALIAS __attribute__((__may_alias__)) - -#else - -#define BENCH_CLOBBER() -#define BENCH_CLOBBER_WITH(x) (bench__use_ptr(&(x)), BENCH_CLOBBER()) -#define BENCH_CLOBBER_WITH_REG(x) (bench__use_ptr(&(x)), BENCH_CLOBBER()) -#define BENCH_CLOBBER_WITH_MEM(x) (bench__use_ptr(&(x)), BENCH_CLOBBER()) -static void bench_use_ptr(char const volatile *x) {} - -#define BENCH_MAY_ALIAS - -#endif - -static int compare_ux(void const *a, void const *b) { - ux A = *(ux *)a, B = *(ux *)b; - return A < B ? -1 : A > B ? 1 : 0; -} - -typedef struct { - ux x, y, z; -} RandState; -static RandState randState = {123, 456, 789}; - -/* RomuDuoJr, see https://romu-random.org/ */ -static ux urand(void) { - ux xp = randState.x, yp = randState.y, zp = randState.z; - randState.x = 3323815723u * zp; - randState.y = ROTL(yp - xp, 6); - randState.z = ROTL(zp - yp, 22); - return xp; -} - -typedef struct { - char const *name; - void *func; -} Impl; -typedef struct { - size_t N; - char const *name; - ux (*func)(void *, size_t); -} Bench; - -static unsigned char *mem = 0; - -void bench_main(void); -ux checksum(size_t n); -void init(void); - -static void memrand(void *ptr, size_t n) { - unsigned char *p = ptr; -#ifdef __GNUC__ - typedef ux __attribute__((__may_alias__)) uxa; - for (; n && (uintptr_t)p % sizeof(uxa); --n) - *p++ = urand(); - uxa *px = (uxa *)p; - for (; n > sizeof(ux); n -= sizeof(ux)) - *px++ = urand(); - p = (unsigned char *)px; -#endif - while (n--) - *p++ = urand(); -} - -#if __STDC_HOSTED__ -#include -#else -static ux heap[1 + MAX_MEM / sizeof(ux)]; -#endif - -int test(void) { - -#if __STDC_HOSTED__ - mem = malloc(MAX_MEM); -#else - mem = (unsigned char *)heap; -#endif - - size_t x; - randState.x ^= rv_cycles() * 7; - randState.y += rv_cycles() ^ (uintptr_t)&x + 666 * (uintptr_t)mem; - - /* initialize memory */ - memrand(mem, MAX_MEM); - - init(); - bench_main(); -#if __STDC_HOSTED__ - free(mem); -#endif - return 0; -} - -static fx bench_time(size_t n, Impl impl, Bench bench) { - static ux arr[MAX_REPEATS]; - size_t total = 0, repeats = 0; - for (; repeats < MAX_REPEATS; ++repeats) { - total += arr[repeats] = bench.func(impl.func, n); - if (repeats > MIN_REPEATS && total > STOP_CYCLES) - break; - } -#if MAX_REPEATS > 4 - qsort(arr, repeats, sizeof *arr, compare_ux); - ux sum = 0, count = 0; - for (size_t i = repeats * 0.2f; i < repeats * 0.8f; ++i, ++count) - sum += arr[i]; -#else - ux sum = 0, count = repeats; - for (size_t i = 0; i < repeats; ++i) - sum += arr[i]; -#endif - return n / ((fx)sum / count); -} - -static void bench_run(size_t nImpls, Impl *impls, size_t nBenches, - Bench *benches) { - for (Bench *b = benches; b != benches + nBenches; ++b) { - size_t N = b->N; - for (Impl *i = impls; i != impls + nImpls; ++i) { - printf("["); - for (size_t n = 1; n < N; n = BENCH_NEXT(n)) { - ux si = 0, s0 = 0; - printf("%f, ", bench_time(n, *i, *b)); - } - printf("],\n"); - } - printf("]\n},\n"); - } -} - -#define TIME \ - for (ux beg = rv_cycles(), _once = 1; _once; \ - BENCH_FENCE(), _cycles += rv_cycles() - beg, _once = 0) - -#define BENCH(name) \ - ux bench_##name(void *_func, size_t n) { \ - Func *f = _func; \ - ux _cycles = 0; -#define BENCH_END \ - return _cycles; \ - } - -#define BENCH_MAIN(impls, benches) \ - void bench_main(void) { \ - bench_run(ARR_LEN(impls), impls, ARR_LEN(benches), benches); \ - } diff --git a/tests/rvv_bench/_include/config.h b/tests/rvv_bench/_include/config.h deleted file mode 100644 index 44f1009b0..000000000 --- a/tests/rvv_bench/_include/config.h +++ /dev/null @@ -1,25 +0,0 @@ -/* processor specific configs */ -#define HAS_E64 (__riscv_v_elen >= 64) -#define HAS_F16 0 - -/* the maximum number of bytes to allocate, minimum of 4096 */ -#define MAX_MEM (4096 * 8) -/* the byte count for the next run */ -#define NEXT(c) (c + c / 3 + 3) - -/* minimum number of repeats, to sample median from */ -#define MIN_REPEATS 1 -/* maxium number of repeats, executed until more than STOP_TIME has elapsed */ -#define MAX_REPEATS 1 - -/* stop repeats early afer this many cycles have elapsed */ -#define STOP_CYCLES (1024 * 1024 * 500) - -/* custom scaling factors for benchmarks, these are used to make sure each - * benchmark approximately takes the same amount of time. */ - -#define SCALE_mandelbrot(N) ((N) / 10) -#define SCALE_mergelines(N) ((N) / 10) - -/* benchmark specific configurations */ -#define mandelbrot_ITER 100 diff --git a/tests/rvv_bench/_include/nolibc.h b/tests/rvv_bench/_include/nolibc.h deleted file mode 100644 index 88f31d136..000000000 --- a/tests/rvv_bench/_include/nolibc.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - -#if __riscv_xlen == 32 -typedef uint32_t ux; -typedef float fx; -#define IF64(...) -#elif __riscv_xlen == 64 -typedef uint64_t ux; -typedef double fx; -#define IF64(...) __VA_ARGS__ -#else -#error "unsupported XLEN" -#endif -#define ARR_LEN(x) (sizeof x / sizeof *(x)) - -static void memwrite(void const *ptr, size_t len) { - fwrite(ptr, 1, len, stdout); -} - -static size_t memread(void *ptr, size_t len) { - return fread(ptr, 1, len, stdin); -} - -static inline ux rv_cycles(void) { - ux cycle; - __asm volatile("csrr %0, mcycle" : "=r"(cycle)); - return cycle; -} - -static void memswap(void *a, void *b, size_t size) { - unsigned char *A = (unsigned char *)a, *B = (unsigned char *)b; - unsigned char *aEnd = A + size; - while (A < aEnd) { - unsigned char temp = *A; - *A++ = *B; - *B++ = temp; - } -} - -static ux usqrt(ux y) { - ux L = 0, R = y + 1; - while (L != R - 1) { - ux M = (L + R) / 2; - if (M * M <= y) - L = M; - else - R = M; - } - return L; -} - -static ux uhash(ux x) { -#if __riscv_xlen == 32 - /* MurmurHash3 32-bit finalizer */ - x ^= x >> 16; - x *= 0x85ebca6b; - x ^= x >> 13; - x *= 0xc2b2ae35; - x ^= x >> 16; -#else - /* splitmix64 finalizer */ - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9U; - x ^= x >> 27; - x *= 0x94d049bb133111ebU; - x ^= x >> 31; -#endif - return x; -} - -#define IFHOSTED(...) __VA_ARGS__ diff --git a/tests/rvv_bench/_include/template.S b/tests/rvv_bench/_include/template.S deleted file mode 100644 index eabdd5017..000000000 --- a/tests/rvv_bench/_include/template.S +++ /dev/null @@ -1,80 +0,0 @@ -#define HAS_RVV_1_0 1 -#include "config.h" -.text -.balign 8 - -#define CAT_(a,b) a##b -#define CAT(a,b) CAT_(a,b) - -#define STR(x) #x -#define STRe(x) STR(x) - -#define MX_N 0 -#include STRe(INC) - -#undef MX_N - -#define MX_N 1 -#define MX8(x) x##m8 -#define MX4(x) x##m4 -#define MX2(x) x##m2 -#define MX(x) x##m1 -#if HAS_RVV_1_0 -#define MXf2(x) x##mf2 -#define MXf4(x) x##mf4 -# define MXf8(x) x##mf8 -#endif -#include STRe(INC) - -#undef MX_N -#undef MX8 -#undef MX4 -#undef MX2 -#undef MX -#undef MXf2 -#undef MXf4 -#undef MXf8 - -#define MX_N 2 -#define MX4(x) x##m8 -#define MX2(x) x##m4 -#define MX(x) x##m2 -#define MXf2(x) x##m1 -#if HAS_RVV_1_0 -#define MXf4(x) x##mf2 -# define MXf8(x) x##mf4 -#endif -#include STRe(INC) - -#undef MX_N -#undef MX4 -#undef MX2 -#undef MX -#undef MXf2 -#undef MXf4 -#undef MXf8 - -#define MX_N 4 -#define MX2(x) x##m8 -#define MX(x) x##m4 -#define MXf2(x) x##m2 -#define MXf4(x) x##m1 -#if HAS_RVV_1_0 -# define MXf8(x) x##mf2 -#endif -#include STRe(INC) - -#undef MX_N -#undef MX2 -#undef MX -#undef MXf2 -#undef MXf4 -#undef MXf8 - -#define MX_N 8 -#define MX(x) x##m8 -#define MXf2(x) x##m4 -#define MXf4(x) x##m2 -#define MXf8(x) x##m1 -#include STRe(INC) - diff --git a/tests/rvv_bench/_include/thirdparty/boring.c b/tests/rvv_bench/_include/thirdparty/boring.c deleted file mode 100644 index e7cea237e..000000000 --- a/tests/rvv_bench/_include/thirdparty/boring.c +++ /dev/null @@ -1,383 +0,0 @@ -/* Copyright (c) 2014, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -// Adapted from the public domain, estream code by D. Bernstein. - -#include "boring.h" - - -extern void *memcpy(void *restrict dest, void const *restrict src, size_t n); - -#define U8TO32_LITTLE(p) \ - (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ - ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) - -// sigma contains the ChaCha constants, which happen to be an ASCII string. -static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', - '2', '-', 'b', 'y', 't', 'e', ' ', 'k' }; - -#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) - -// QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. -#define QUARTERROUND(a, b, c, d) \ - x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 16); \ - x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 7); - -#define U32TO8_LITTLE(p, v) \ - { \ - (p)[0] = (v >> 0) & 0xff; \ - (p)[1] = (v >> 8) & 0xff; \ - (p)[2] = (v >> 16) & 0xff; \ - (p)[3] = (v >> 24) & 0xff; \ - } - -// chacha_core performs 20 rounds of ChaCha on the input words in -// |input| and writes the 64 output bytes to |output|. -static void chacha_core(uint8_t output[64], const uint32_t input[16]) { - uint32_t x[16]; - int i; - - memcpy(x, input, sizeof(uint32_t) * 16); - for (i = 20; i > 0; i -= 2) { - QUARTERROUND(0, 4, 8, 12) - QUARTERROUND(1, 5, 9, 13) - QUARTERROUND(2, 6, 10, 14) - QUARTERROUND(3, 7, 11, 15) - QUARTERROUND(0, 5, 10, 15) - QUARTERROUND(1, 6, 11, 12) - QUARTERROUND(2, 7, 8, 13) - QUARTERROUND(3, 4, 9, 14) - } - - for (i = 0; i < 16; ++i) { - x[i] += input[i]; - } - for (i = 0; i < 16; ++i) { - U32TO8_LITTLE(output + 4 * i, x[i]); - } -} - -void boring_chacha20(uint8_t *out, const uint8_t *in, size_t in_len, - const uint8_t key[32], const uint8_t nonce[12], - uint32_t counter) { - - uint32_t input[16]; - uint8_t buf[64]; - size_t todo, i; - - input[0] = U8TO32_LITTLE(sigma + 0); - input[1] = U8TO32_LITTLE(sigma + 4); - input[2] = U8TO32_LITTLE(sigma + 8); - input[3] = U8TO32_LITTLE(sigma + 12); - - input[4] = U8TO32_LITTLE(key + 0); - input[5] = U8TO32_LITTLE(key + 4); - input[6] = U8TO32_LITTLE(key + 8); - input[7] = U8TO32_LITTLE(key + 12); - - input[8] = U8TO32_LITTLE(key + 16); - input[9] = U8TO32_LITTLE(key + 20); - input[10] = U8TO32_LITTLE(key + 24); - input[11] = U8TO32_LITTLE(key + 28); - - input[12] = counter; - input[13] = U8TO32_LITTLE(nonce + 0); - input[14] = U8TO32_LITTLE(nonce + 4); - input[15] = U8TO32_LITTLE(nonce + 8); - - while (in_len > 0) { - todo = sizeof(buf); - if (in_len < todo) { - todo = in_len; - } - - chacha_core(buf, input); - for (i = 0; i < todo; i++) { - out[i] = in[i] ^ buf[i]; - } - - out += todo; - in += todo; - in_len -= todo; - - input[12]++; - } -} - -///// poly1305 - -static uint32_t U8TO32_LE(const uint8_t *m) { - uint32_t r; - memcpy(&r, m, sizeof(r)); - return r; -} - -static void U32TO8_LE(uint8_t *m, uint32_t v) { - memcpy(m, &v, sizeof(v)); -} - - -static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; } - -struct poly1305_state_st { - uint32_t r0, r1, r2, r3, r4; - uint32_t s1, s2, s3, s4; - uint32_t h0, h1, h2, h3, h4; - uint8_t buf[16]; - unsigned int buf_used; - uint8_t key[16]; -}; - -static inline struct poly1305_state_st *poly1305_aligned_state( - poly1305_state *state) { - return (struct poly1305_state_st *)(((uintptr_t)state + 63) & ~63); -} - -static void poly1305_update(struct poly1305_state_st *state, const uint8_t *in, - size_t len) { - uint32_t t0, t1, t2, t3; - uint64_t t[5]; - uint32_t b; - uint64_t c; - size_t j; - uint8_t mp[16]; - - if (len < 16) { - goto poly1305_donna_atmost15bytes; - } - - poly1305_donna_16bytes: - t0 = U8TO32_LE(in); - t1 = U8TO32_LE(in + 4); - t2 = U8TO32_LE(in + 8); - t3 = U8TO32_LE(in + 12); - - in += 16; - len -= 16; - - state->h0 += t0 & 0x3ffffff; - state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - state->h4 += (t3 >> 8) | (1 << 24); - - poly1305_donna_mul: - t[0] = mul32x32_64(state->h0, state->r0) + mul32x32_64(state->h1, state->s4) + - mul32x32_64(state->h2, state->s3) + mul32x32_64(state->h3, state->s2) + - mul32x32_64(state->h4, state->s1); - t[1] = mul32x32_64(state->h0, state->r1) + mul32x32_64(state->h1, state->r0) + - mul32x32_64(state->h2, state->s4) + mul32x32_64(state->h3, state->s3) + - mul32x32_64(state->h4, state->s2); - t[2] = mul32x32_64(state->h0, state->r2) + mul32x32_64(state->h1, state->r1) + - mul32x32_64(state->h2, state->r0) + mul32x32_64(state->h3, state->s4) + - mul32x32_64(state->h4, state->s3); - t[3] = mul32x32_64(state->h0, state->r3) + mul32x32_64(state->h1, state->r2) + - mul32x32_64(state->h2, state->r1) + mul32x32_64(state->h3, state->r0) + - mul32x32_64(state->h4, state->s4); - t[4] = mul32x32_64(state->h0, state->r4) + mul32x32_64(state->h1, state->r3) + - mul32x32_64(state->h2, state->r2) + mul32x32_64(state->h3, state->r1) + - mul32x32_64(state->h4, state->r0); - - state->h0 = (uint32_t)t[0] & 0x3ffffff; - c = (t[0] >> 26); - t[1] += c; - state->h1 = (uint32_t)t[1] & 0x3ffffff; - b = (uint32_t)(t[1] >> 26); - t[2] += b; - state->h2 = (uint32_t)t[2] & 0x3ffffff; - b = (uint32_t)(t[2] >> 26); - t[3] += b; - state->h3 = (uint32_t)t[3] & 0x3ffffff; - b = (uint32_t)(t[3] >> 26); - t[4] += b; - state->h4 = (uint32_t)t[4] & 0x3ffffff; - b = (uint32_t)(t[4] >> 26); - state->h0 += b * 5; - - if (len >= 16) { - goto poly1305_donna_16bytes; - } - - // final bytes - poly1305_donna_atmost15bytes: - if (!len) { - return; - } - - for (j = 0; j < len; j++) { - mp[j] = in[j]; - } - mp[j++] = 1; - for (; j < 16; j++) { - mp[j] = 0; - } - len = 0; - - t0 = U8TO32_LE(mp + 0); - t1 = U8TO32_LE(mp + 4); - t2 = U8TO32_LE(mp + 8); - t3 = U8TO32_LE(mp + 12); - - state->h0 += t0 & 0x3ffffff; - state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - state->h4 += (t3 >> 8); - - goto poly1305_donna_mul; -} - -void boring_poly1305_init(poly1305_state *statep, const uint8_t key[32]) { - struct poly1305_state_st *state = poly1305_aligned_state(statep); - uint32_t t0, t1, t2, t3; - - t0 = U8TO32_LE(key + 0); - t1 = U8TO32_LE(key + 4); - t2 = U8TO32_LE(key + 8); - t3 = U8TO32_LE(key + 12); - - // precompute multipliers - state->r0 = t0 & 0x3ffffff; - t0 >>= 26; - t0 |= t1 << 6; - state->r1 = t0 & 0x3ffff03; - t1 >>= 20; - t1 |= t2 << 12; - state->r2 = t1 & 0x3ffc0ff; - t2 >>= 14; - t2 |= t3 << 18; - state->r3 = t2 & 0x3f03fff; - t3 >>= 8; - state->r4 = t3 & 0x00fffff; - - state->s1 = state->r1 * 5; - state->s2 = state->r2 * 5; - state->s3 = state->r3 * 5; - state->s4 = state->r4 * 5; - - // init state - state->h0 = 0; - state->h1 = 0; - state->h2 = 0; - state->h3 = 0; - state->h4 = 0; - - state->buf_used = 0; - memcpy(state->key, key + 16, sizeof(state->key)); -} - -void boring_poly1305_update(poly1305_state *statep, const uint8_t *in, - size_t in_len) { - unsigned int i; - struct poly1305_state_st *state = poly1305_aligned_state(statep); - - if (state->buf_used) { - unsigned todo = 16 - state->buf_used; - if (todo > in_len) { - todo = (unsigned)in_len; - } - for (i = 0; i < todo; i++) { - state->buf[state->buf_used + i] = in[i]; - } - state->buf_used += todo; - in_len -= todo; - in += todo; - - if (state->buf_used == 16) { - poly1305_update(state, state->buf, 16); - state->buf_used = 0; - } - } - - if (in_len >= 16) { - size_t todo = in_len & ~0xf; - poly1305_update(state, in, todo); - in += todo; - in_len &= 0xf; - } - - if (in_len) { - for (i = 0; i < in_len; i++) { - state->buf[i] = in[i]; - } - state->buf_used = (unsigned)in_len; - } -} - -void boring_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) { - struct poly1305_state_st *state = poly1305_aligned_state(statep); - uint64_t f0, f1, f2, f3; - uint32_t g0, g1, g2, g3, g4; - uint32_t b, nb; - - if (state->buf_used) { - poly1305_update(state, state->buf, state->buf_used); - } - - b = state->h0 >> 26; - state->h0 = state->h0 & 0x3ffffff; - state->h1 += b; - b = state->h1 >> 26; - state->h1 = state->h1 & 0x3ffffff; - state->h2 += b; - b = state->h2 >> 26; - state->h2 = state->h2 & 0x3ffffff; - state->h3 += b; - b = state->h3 >> 26; - state->h3 = state->h3 & 0x3ffffff; - state->h4 += b; - b = state->h4 >> 26; - state->h4 = state->h4 & 0x3ffffff; - state->h0 += b * 5; - - g0 = state->h0 + 5; - b = g0 >> 26; - g0 &= 0x3ffffff; - g1 = state->h1 + b; - b = g1 >> 26; - g1 &= 0x3ffffff; - g2 = state->h2 + b; - b = g2 >> 26; - g2 &= 0x3ffffff; - g3 = state->h3 + b; - b = g3 >> 26; - g3 &= 0x3ffffff; - g4 = state->h4 + b - (1 << 26); - - b = (g4 >> 31) - 1; - nb = ~b; - state->h0 = (state->h0 & nb) | (g0 & b); - state->h1 = (state->h1 & nb) | (g1 & b); - state->h2 = (state->h2 & nb) | (g2 & b); - state->h3 = (state->h3 & nb) | (g3 & b); - state->h4 = (state->h4 & nb) | (g4 & b); - - f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); - f1 = ((state->h1 >> 6) | (state->h2 << 20)) + - (uint64_t)U8TO32_LE(&state->key[4]); - f2 = ((state->h2 >> 12) | (state->h3 << 14)) + - (uint64_t)U8TO32_LE(&state->key[8]); - f3 = ((state->h3 >> 18) | (state->h4 << 8)) + - (uint64_t)U8TO32_LE(&state->key[12]); - - U32TO8_LE(&mac[0], f0); - f1 += (f0 >> 32); - U32TO8_LE(&mac[4], f1); - f2 += (f1 >> 32); - U32TO8_LE(&mac[8], f2); - f3 += (f2 >> 32); - U32TO8_LE(&mac[12], f3); -} diff --git a/tests/rvv_bench/_include/thirdparty/boring.h b/tests/rvv_bench/_include/thirdparty/boring.h deleted file mode 100644 index 3fb2300b6..000000000 --- a/tests/rvv_bench/_include/thirdparty/boring.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (c) 2014, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -#include -#include - -void boring_chacha20(uint8_t *out, const uint8_t *in, - size_t in_len, const uint8_t key[32], - const uint8_t nonce[12], uint32_t counter); - -typedef uint8_t poly1305_state[512]; - -void boring_poly1305_init(poly1305_state *state, - const uint8_t key[32]); - -void boring_poly1305_update(poly1305_state *state, - const uint8_t *in, size_t in_len); - -void boring_poly1305_finish(poly1305_state *state, - uint8_t mac[16]); diff --git a/tests/rvv_bench/_include/thirdparty/rvv-rollback.S b/tests/rvv_bench/_include/thirdparty/rvv-rollback.S deleted file mode 100644 index e941604bb..000000000 --- a/tests/rvv_bench/_include/thirdparty/rvv-rollback.S +++ /dev/null @@ -1,255 +0,0 @@ -# rvv-rollback.S -- A minimal benchmarking library -# Olaf Bernstein -# Distributed under the MIT license, see license at the end of the file. -# New versions available at https://gist.github.com/camel-cdr/cfd9ba2b8754b521edf4892fe19c7031 -# Conversions taken from https://github.com/RISCVtestbed/rvv-rollback - -.macro vle32.v a:vararg - vlw.v \a -.endm -.macro vle16.v a:vararg - vlh.v \a -.endm -.macro vle8.v a:vararg - vlb.v \a -.endm -.macro vle32ff.v a:vararg - vlwff.v \a -.endm -.macro vle16ff.v a:vararg - vlhff.v \a -.endm -.macro vle8ff.v a:vararg - vlbff.v \a -.endm -.macro vse32.v a:vararg - vsw.v \a -.endm -.macro vse16.v a:vararg - vsh.v \a -.endm -.macro vse8.v a:vararg - vsb.v \a -.endm -.macro vluxei32.v a:vararg - vlxw.v \a -.endm -.macro vluxei16.v a:vararg - vlxh.v \a -.endm -.macro vluxei8.v a:vararg - vlxb.v \a -.endm -.macro vsuxei32.v a:vararg - vsuxw.v \a -.endm -.macro vsuxei16.v a:vararg - vsuxh.v \a -.endm -.macro vsuxei8.v a:vararg - vsuxb.v \a -.endm -.macro vlse32.v a:vararg - vlsw.v \a -.endm -.macro vlse16.v a:vararg - vlsh.v \a -.endm -.macro vlse8.v a:vararg - vlsb.v \a -.endm -.macro vsse32.v a:vararg - vssw.v \a -.endm -.macro vsse16.v a:vararg - vssh.v \a -.endm -.macro vsse8.v a:vararg - vssb.v \a -.endm -.macro vloxei32.v a:vararg - vlxw.v \a -.endm -.macro vloxei16.v a:vararg - vlxh.v \a -.endm -.macro vloxei8.v a:vararg - vlxb.v \a -.endm -.macro vsoxei32.v a:vararg - vsxw.v \a -.endm -.macro vsoxei16.v a:vararg - vsxh.v \a -.endm -.macro vsoxei8.v a:vararg - vsxb.v \a -.endm -.macro vfncvt.xu.f.w a:vararg - vfncvt.xu.f.v \a -.endm -.macro vfncvt.x.f.w a:vararg - vfncvt.x.f.v \a -.endm -.macro vfncvt.f.xu.w a:vararg - vfncvt.f.xu.v \a -.endm -.macro vfncvt.f.x.w a:vararg - vfncvt.f.x.v \a -.endm -.macro vfncvt.f.f.w a:vararg - vfncvt.f.f.v \a -.endm -.macro vfredusum a:vararg - vfredsum \a -.endm -.macro vfwredusum.vs a:vararg - vfwredsum.vs \a -.endm -.macro vnclip.wv a:vararg - vnclip.vv \a -.endm -.macro vnclip.wx a:vararg - vnclip.vx \a -.endm -.macro vnclip.wi a:vararg - vnclip.vi \a -.endm -.macro vnclipu.wv a:vararg - vnclipu.vv \a -.endm -.macro vnclipu.wx a:vararg - vnclipu.vx \a -.endm -.macro vnclipu.wi a:vararg - vnclipu.vi \a -.endm -.macro vnsra.wv a:vararg - vnsra.vv \a -.endm -.macro vnsra.wx a:vararg - vnsra.vx \a -.endm -.macro vnsra.wi a:vararg - vnsra.vi \a -.endm -.macro vnsrl.wv a:vararg - vnsrl.vv \a -.endm -.macro vnsrl.wx a:vararg - vnsrl.vx \a -.endm -.macro vnsrl.wi a:vararg - vnsrl.vi \a -.endm -.macro vmandn.mm a:vararg - vmandnot.mm \a -.endm -.macro vmorn.mm a:vararg - vmornot.mm \a -.endm -.macro vmmv.m a:vararg - vmcpy.m \a -.endm -.macro vcpop.m a:vararg - vmpopc.m \a -.endm -.macro vpop.m a:vararg - vmpopc.m \a -.endm -.macro vfirst.m a:vararg - vmfirst.m \a -.endm - -.macro define_for_all_nf prefix suffix prefix2 suffix2 - .macro \prefix\()2\suffix a:vararg - \prefix2\()2\suffix2 \a - .endm - .macro \prefix\()3\suffix a:vararg - \prefix2\()3\suffix2 \a - .endm - .macro \prefix\()4\suffix a:vararg - \prefix2\()4\suffix2 \a - .endm - .macro \prefix\()5\suffix a:vararg - \prefix2\()5\suffix2 \a - .endm - .macro \prefix\()6\suffix a:vararg - \prefix2\()6\suffix2 \a - .endm - .macro \prefix\()7\suffix a:vararg - \prefix2\()7\suffix2 \a - .endm - .macro \prefix\()8\suffix a:vararg - \prefix2\()8\suffix2 \a - .endm -.endm -define_for_all_nf vlseg e8.v vlseg b.v -define_for_all_nf vlseg e16.v vlseg h.v -define_for_all_nf vlseg e32.v vlseg w.v - -define_for_all_nf vsseg e8.v vsseg b.v -define_for_all_nf vsseg e16.v vsseg h.v -define_for_all_nf vsseg e32.v vsseg w.v - -define_for_all_nf vlsseg e8.v vlsseg bu.v -define_for_all_nf vlsseg e16.v vlsseg hu.v -define_for_all_nf vlsseg e32.v vlsseg wu.v - -define_for_all_nf vssseg e8.v vssseg b.v -define_for_all_nf vssseg e16.v vssseg h.v -define_for_all_nf vssseg e32.v vssseg w.v - -define_for_all_nf vloxseg e8.v vlxseg b.v -define_for_all_nf vloxseg e16.v vlxseg h.v -define_for_all_nf vloxseg e32.v vlxseg w.v -define_for_all_nf vluxseg e8.v vlxseg b.v -define_for_all_nf vluxseg e16.v vlxseg h.v -define_for_all_nf vluxseg e32.v vlxseg w.v - -define_for_all_nf vsoxseg e8.v vsxseg b.v -define_for_all_nf vsoxseg e16.v vsxseg h.v -define_for_all_nf vsoxseg e32.v vsxseg w.v -define_for_all_nf vsuxseg e8.v vsxseg b.v -define_for_all_nf vsuxseg e16.v vsxseg h.v -define_for_all_nf vsuxseg e32.v vsxseg w.v - - -.macro vsetvl0p7 rd, rs1, rs2, T=1, M=1 - vsetvl \rd, \rs1, \rs2 -.endm -.macro vsetvli0p7 rd, rs1, e=e8, m=m1, T=1, M=1 - .ifc \m, mf2 - NOT SUPPORTED IN rvv0.7 - .endif - .ifc \m, mf4 - NOT SUPPORTED IN rvv0.7 - .endif - .ifc \m, mf8 - NOT SUPPORTED IN rvv0.7 - .endif - vsetvli \rd, \rs1, \e, \m -.endm - -#define vsetvl vsetvl0p7 -#define vsetvli vsetvli0p7 - - - -# Copyright (c) 2023 Olaf Berstein -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - diff --git a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S b/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S deleted file mode 100644 index b363d7830..000000000 --- a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.S +++ /dev/null @@ -1,68 +0,0 @@ -#ifdef MX - -#if MX_N == 4 || MX_N == 2 || MX_N == 1 - -.global MX(ascii_to_utf16_rvv_vsseg_) -.type MX(ascii_to_utf16_rvv_vsseg_), @function -MX(ascii_to_utf16_rvv_vsseg_): - vsetvli t0, x0, e8, MX2(), ta, ma - vmv.v.i v0, 0 -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) - vsseg2e8.v v0, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 1 - add a0, a0, t0 - bnez a2, 1b - ret - - - -.global MX(ascii_to_utf16_rvv_ext_) -.type MX(ascii_to_utf16_rvv_ext_), @function -MX(ascii_to_utf16_rvv_ext_): -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) -#if HAS_RVV_1_0 - vsetvli x0, x0, e16, MX2(), ta, ma - vzext.vf2 v8, v0 -#else - vwaddu.vx v8, v0, x0 - vsetvli x0, a2, e16, MX2(), ta, ma -#endif - vse16.v v8, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 1 - add a0, a0, t0 - bnez a2, 1b - ret - - -.global MX(ascii_to_utf16_rvv_vss_) -.type MX(ascii_to_utf16_rvv_vss_), @function -MX(ascii_to_utf16_rvv_vss_): - vsetvli t0, x0, e8, MX2(), ta, ma - vmv.v.i v0, 0 - li a3, 2 -1: - vsetvli t0, a2, e16, MX2(), ta, ma - vse16.v v0, (a0) - - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vsse8.v v8, (a0), a3 - - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 1 - add a0, a0, t0 - bnez a2, 1b - ret - -#endif -#endif - diff --git a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c b/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c deleted file mode 100644 index fc3fba747..000000000 --- a/tests/rvv_bench/ascii_to_utf16/ascii_to_utf16.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "bench.h" - -void -ascii_to_utf16_scalar(uint16_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++, BENCH_CLOBBER(); -} - -void -ascii_to_utf16_scalar_autovec(uint16_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++; -} - -#define IMPLS(f) \ - f(scalar) f(scalar_autovec) \ - f(rvv_ext_m1) f(rvv_ext_m2) f(rvv_ext_m4) \ - f(rvv_vsseg_m1) f(rvv_vsseg_m2) f(rvv_vsseg_m4) \ - f(rvv_vss_m1) f(rvv_vss_m2) f(rvv_vss_m4) \ - -typedef void Func(uint16_t *restrict dest, uint8_t const *restrict src, size_t len); - -#define DECLARE(f) extern Func ascii_to_utf16_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &ascii_to_utf16_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint16_t *dest; -uint8_t *src; - -void init(void) { } - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t dOff, size_t sOff) { - dest = (uint16_t*)mem + dOff/2; - src = (uint8_t*)(dest + 9 + MAX_MEM/3) + sOff; - memrand(src, n+9); - for (size_t i = 0; i < n+9; ++i) src[i] |= 0x7F; - memset(dest, 1, (n+9)*2); -} - -BENCH(base) { - common(n, urand() & 255, urand() & 255); - TIME f(dest, src, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0, 0); - TIME f(dest, src, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/3 - 512-9*2, "ascii to utf16", bench_base }, - { MAX_MEM/3 - 512-9*2, "ascii to utf16 aligned", bench_aligned }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S b/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S deleted file mode 100644 index 9cf21fad3..000000000 --- a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.S +++ /dev/null @@ -1,66 +0,0 @@ -#ifdef MX - -#if MX_N == 2 || MX_N == 1 - -.global MX(ascii_to_utf32_rvv_vsseg_) -MX(ascii_to_utf32_rvv_vsseg_): - vsetvli t0, x0, e8, MX4(), ta, ma - vmv.v.i v0, 0 -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) - vsseg4e8.v v0, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a2, 1b - ret - - -.global MX(ascii_to_utf32_rvv_ext_) -MX(ascii_to_utf32_rvv_ext_): -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) -#if HAS_RVV_1_0 - vsetvli x0, x0, e32, MX4(), ta, ma - vzext.vf4 v8, v0 -#else - vwaddu.vx v16, v0, x0 - vsetvli x0, a2, e16, MX2(), ta, ma - vwaddu.vx v8, v16, x0 - vsetvli x0, a2, e32, MX4(), ta, ma -#endif - vse32.v v8, (a0) - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a2, 1b - ret - - -.global MX(ascii_to_utf32_rvv_vss_) -MX(ascii_to_utf32_rvv_vss_): - vsetvli t0, x0, e8, MX4(), ta, ma - vmv.v.i v0, 0 - li a3, 4 -1: - vsetvli t0, a2, e32, MX4(), ta, ma - vse32.v v0, (a0) - - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vsse8.v v8, (a0), a3 - - add a1, a1, t0 - sub a2, a2, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a2, 1b - ret - -#endif -#endif - diff --git a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c b/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c deleted file mode 100644 index 968493037..000000000 --- a/tests/rvv_bench/ascii_to_utf32/ascii_to_utf32.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "bench.h" - -void -ascii_to_utf32_scalar(uint32_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++, BENCH_CLOBBER(); -} - -void -ascii_to_utf32_scalar_autovec(uint32_t *restrict dest, uint8_t const *restrict src, size_t len) -{ - while (len--) *dest++ = *src++; -} - -#define IMPLS(f) \ - f(scalar) f(scalar_autovec) \ - f(rvv_ext_m1) f(rvv_ext_m2) \ - f(rvv_vsseg_m1) f(rvv_vsseg_m2) \ - f(rvv_vss_m1) f(rvv_vss_m2) \ - -typedef void Func(uint32_t *restrict dest, uint8_t const *restrict src, size_t len); - -#define DECLARE(f) extern Func ascii_to_utf32_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &ascii_to_utf32_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint32_t *dest; -uint8_t *src; - -void init(void) { } - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t dOff, size_t sOff) { - dest = (uint32_t*)mem + dOff/4; - src = (uint8_t*)(dest + 9 + MAX_MEM/5) + sOff; - memrand(src, n+9); - for (size_t i = 0; i < n+9; ++i) src[i] |= 0x7F; - memset(dest, 1, (n+9)*4); -} - -BENCH(base) { - common(n, urand() & 255, urand() & 255); - TIME f(dest, src, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0, 0); - TIME f(dest, src, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/5 - 512-9*2, "ascii to utf32", bench_base }, - { MAX_MEM/5 - 512-9*2, "ascii to utf32 aligned", bench_aligned }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/byteswap/byteswap.S b/tests/rvv_bench/byteswap/byteswap.S deleted file mode 100644 index 79154ef68..000000000 --- a/tests/rvv_bench/byteswap/byteswap.S +++ /dev/null @@ -1,81 +0,0 @@ -/* - * TODO: This currently only works for VLEN<=256. - * I think rvv 1.0 should only vrgatherei16.vv here in the future. - */ - -#ifdef MX - - -# a0 = ptr, a1 = len -.global MX(byteswap32_rvv_gather_) -MX(byteswap32_rvv_gather_): - vsetvli t0, x0, e8, MX(), ta, ma - vid.v v0 - vand.vi v8, v0, 3 - vrsub.vi v8, v8, 3 - vsrl.vi v0, v0, 2 - vsll.vi v0, v0, 2 - vadd.vv v0, v0, v8 # i/8*8 + (7-1%8) -1: - vsetvli t0, a1, e32, MX(), ta, ma - vle32.v v8, (a0) - slli t1, t0, 2 - vsetvli x0, t1, e8, MX(), ta, ma - vrgather.vv v16, v8, v0 - vsetvli x0, t0, e32, MX(), ta, ma - vse32.v v16, (a0) - sub a1, a1, t0 - add a0, a0, t1 - bnez a1, 1b - ret -#endif - -#if MX_N == 2 - -.macro byteswap32_rvv_m1_gathers n - .global byteswap32_rvv_m1_gathers_m\n - byteswap32_rvv_m1_gathers_m\n: - vsetvli t0, x0, e8, m1, ta, ma - vid.v v0 - vand.vi v8, v0, 3 - vrsub.vi v8, v8, 3 - vsrl.vi v0, v0, 2 - vsll.vi v0, v0, 2 - vadd.vv v0, v0, v8 # i/8*8 + (7-1%8) - 1: - vsetvli t0, a1, e32, m\n, ta, ma - vle32.v v8, (a0) - vsetvli t1, x0, e8, m1, ta, ma - vrgather.vv v16, v8, v0 - .ifge \n-2 - vrgather.vv v17, v9, v0 - .ifge \n-4 - vrgather.vv v18, v10, v0 - vrgather.vv v19, v11, v0 - .ifge \n-8 - vrgather.vv v20, v12, v0 - vrgather.vv v21, v13, v0 - vrgather.vv v22, v14, v0 - vrgather.vv v23, v15, v0 - .endif - .endif - .endif - vsetvli x0, t0, e32, m\n, ta, ma - vse32.v v16, (a0) - sub a1, a1, t0 - slli t0, t0, 2 - add a0, a0, t0 - bnez a1, 1b - ret -.endm - -byteswap32_rvv_m1_gathers 2 -#endif -#if MX_N == 4 -byteswap32_rvv_m1_gathers 4 -#endif -#if MX_N == 8 -byteswap32_rvv_m1_gathers 8 -#endif - - diff --git a/tests/rvv_bench/byteswap/byteswap.c b/tests/rvv_bench/byteswap/byteswap.c deleted file mode 100644 index dff204b72..000000000 --- a/tests/rvv_bench/byteswap/byteswap.c +++ /dev/null @@ -1,79 +0,0 @@ -#include "bench.h" - -void -byteswap32_scalar(uint32_t *ptr, size_t n) -{ - for (uint8_t *p = (uint8_t*)ptr; n--; p += 4) { - uint8_t p0 = p[0], p1 = p[1], p2 = p[2], p3 = p[3]; - p[3] = p0; BENCH_CLOBBER(); - p[2] = p1; BENCH_CLOBBER(); - p[1] = p2; BENCH_CLOBBER(); - p[0] = p3; BENCH_CLOBBER(); - } -} - -void -byteswap32_scalar_autovec(uint32_t *ptr, size_t n) -{ - for (uint8_t *p = (uint8_t*)ptr; n--; p += 4) { - uint8_t p0 = p[0], p1 = p[1], p2 = p[2], p3 = p[3]; - p[3] = p0; - p[2] = p1; - p[1] = p2; - p[0] = p3; - } -} - -#if __riscv_zbb -void -byteswap32_SWAR_rev8(uint32_t *ptr, size_t n) -{ - while (n--) { - *ptr = __builtin_bswap32(*ptr); - ++ptr; - BENCH_CLOBBER(); - } -} -#define REV8(f) f(SWAR_rev8) -#else -#define REV8(f) -#endif - - -#define IMPLS(f) \ - f(scalar) \ - f(scalar_autovec) \ - REV8(f) \ - MX(f, rvv_gather) \ - f(rvv_m1_gathers_m2) \ - f(rvv_m1_gathers_m4) \ - f(rvv_m1_gathers_m8) \ - -typedef void Func(uint32_t *ptr, size_t n); - -#define DECLARE(f) extern Func byteswap32_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &byteswap32_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint32_t *ptr; - -void init(void) { ptr = (uint32_t*)mem; } - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n; ++i) - sum = uhash(sum) + ptr[i]; - return sum; -} - -BENCH(base) { - memrand(ptr, n * sizeof *ptr); - TIME f(ptr, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/4, "byteswap32", bench_base } -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/chacha20/chacha20.S b/tests/rvv_bench/chacha20/chacha20.S deleted file mode 100644 index 9c62caeba..000000000 --- a/tests/rvv_bench/chacha20/chacha20.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef MX -#if __riscv_xlen >= 64 -#include "rvv-chacha-poly/vchacha.s" -#endif -#endif diff --git a/tests/rvv_bench/chacha20/chacha20.c b/tests/rvv_bench/chacha20/chacha20.c deleted file mode 100644 index 7d6328b54..000000000 --- a/tests/rvv_bench/chacha20/chacha20.c +++ /dev/null @@ -1,61 +0,0 @@ -#include "bench.h" -#if __riscv_xlen >= 64 -#include "../thirdparty/boring.h" - -uint8_t *dest, *src; -uint8_t key[32], nonce[12]; -uint32_t counter; - - -extern void vector_chacha20( - uint8_t *out, const uint8_t *in, - size_t in_len, const uint8_t key[32], - const uint8_t nonce[12], uint32_t counter); - -static void -chacha20_boring(void *restrict dest, void const *restrict src, size_t n) { - boring_chacha20(dest, src, n, key, nonce, counter); -} - -static void -chacha20_rvv(void *restrict dest, void const *restrict src, size_t n) { - vector_chacha20(dest, src, n, key, nonce, counter); -} - -typedef void *Func(void *restrict dest, void const *restrict src, size_t n); - -Impl impls[] = { - { "boring", &chacha20_boring }, - { "rvv", &chacha20_rvv }, -}; - -void init(void) { - memrand(key, sizeof key); - memrand(nonce, sizeof nonce); - counter = 0; -} - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < n+16; ++i) - sum = uhash(sum) + mem[i]; - return sum; -} - -BENCH(aligned) { - memset(mem, 0, n+16); - TIME f(mem, mem + MAX_MEM/2 + 16, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/2 - 16, "chacha20 aligned", bench_aligned } -}; BENCH_MAIN(impls, benches) - - -#include "../thirdparty/boring.c" -#else -void init(void) {} -Impl impls[] = {}; -Bench benches[] = {}; -BENCH_MAIN(impls, benches) -#endif diff --git a/tests/rvv_bench/default.nix b/tests/rvv_bench/default.nix index 5a5c08121..5d8f88017 100644 --- a/tests/rvv_bench/default.nix +++ b/tests/rvv_bench/default.nix @@ -1,40 +1,67 @@ { lib -, getTestRequiredFeatures +, fetchFromGitHub , linkerScript , makeBuilder -, findAndBuild , t1main -, makeEmuResult +, filterByFeatures }: let - include = ./_include; + src = fetchFromGitHub { + owner = "camel-cdr"; + repo = "rvv-bench"; + rev = "5dc20c3596b3aa8412804e2d169d1b175bae927a"; + hash = "sha256-5A079sl4g7FIWgCYykLgTZXrmyfIblyXtxeh1AwqKiU="; + fetchSubmodules = true; + }; + + nonFpCases = [ + "ascii_to_utf16" + "ascii_to_utf32" + "byteswap" + "chacha20" + "memcpy" + "memset" + "mergelines" + "poly1305" + "strlen" + "utf8_count" + ]; + + fpCases = [ + "mandelbrot" + ]; + + cases = nonFpCases ++ fpCases; + builder = makeBuilder { casePrefix = "rvv_bench"; }; - build = { caseName, sourcePath }: + build = caseName: let drv = builder { - inherit caseName; + inherit caseName src; - src = sourcePath; + patches = [ ./t1_runtime.patch ]; - featuresRequired = getTestRequiredFeatures sourcePath; + featuresRequired = lib.optionals (lib.elem caseName fpCases) [ "zve32f" ]; buildPhase = '' runHook preBuild + pushd bench >/dev/null - $CC -E -DINC=$PWD/${caseName}.S -E ${include}/template.S -o functions.S - $CC -I${include} ${caseName}.c -T${linkerScript} ${t1main} functions.S -o $pname.elf + $CC -E -DINC=$PWD/${caseName}.S template.S -E -o functions.S + $CC ${caseName}.c -T${linkerScript} ${t1main} functions.S -o ../$pname.elf + popd >/dev/null runHook postBuild ''; - meta.description = "test case '${caseName}', written in C intrinsic"; - - passthru.emu-result = makeEmuResult drv; + meta.description = "test case '${caseName}' from rvv-bench"; }; in drv; in -findAndBuild ./. build +lib.filterAttrs + filterByFeatures + (lib.genAttrs cases build) diff --git a/tests/rvv_bench/mandelbrot/mandelbrot.S b/tests/rvv_bench/mandelbrot/mandelbrot.S deleted file mode 100644 index 55224666a..000000000 --- a/tests/rvv_bench/mandelbrot/mandelbrot.S +++ /dev/null @@ -1,358 +0,0 @@ -#if 0 - -void -mandelbrot_rvv(size_t width, size_t maxIter, uint32_t *res) -{ - vfloat32m2_t cx, cy, zx, zy, zx2, zy2; - vuint32m2_t viter; - vbool16_t mask; - - for (size_t y = 0; y < width; ++y) { - size_t vl, x = width; - while (x > 0) { - x -= vl = __riscv_vsetvl_e32m2(x); - - mask = __riscv_vmset_m_b16(vl); - viter = __riscv_vmv_v_x_u32m2(0, vl); - - cx = __riscv_vfcvt_f_xu_v_f32m2(__riscv_vadd_vx_u32m2(__riscv_viota_m_u32m2(mask, vl), x, vl), vl); - cy = __riscv_vfmv_v_f_f32m2(y, vl); - - cx = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vf_f32m2(cx, 2.0f / width, vl), -1.5f, vl); - cy = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vf_f32m2(cy, 2.0f / width, vl), -1, vl); - - zx = zy = zx2 = zy2 = __riscv_vfmv_v_f_f32m2(0, vl); - - size_t iter = 0; - while (iter < maxIter && __riscv_vfirst_m_b16(mask, vl) >= 0) { - mask = __riscv_vmflt_vf_f32m2_b16(__riscv_vfadd_vv_f32m2(zx2, zy2, vl), 4, vl); - zx2 = __riscv_vfadd_vv_f32m2(__riscv_vfsub_vv_f32m2(zx2, zy2, vl), cx, vl); - zy = __riscv_vfmacc_vv_f32m2(cy, __riscv_vfadd_vv_f32m2(zx, zx, vl), zy, vl); - zx = zx2; - zx2 = __riscv_vfmul_vv_f32m2(zx, zx, vl); - zy2 = __riscv_vfmul_vv_f32m2(zy, zy, vl); - ++iter; - viter = __riscv_vmerge_vxm_u32m2(viter, iter, mask, vl); - } - __riscv_vse32_v_u32m2(res + x, viter, vl); - } - res += width; - } -} - -#endif - -#if MX_N > 0 && MX_N <= 2 - -#if HAS_F16 -.global MX(mandelbrot_rvv_f16_) # generated by clang -MX(rvv_f16_m1p5): - .half 0xbe00 # half -1.5 -MX(rvv_f16_m1): - .half 0xbc00 # half -1 -MX(rvv_f16_p4): - .half 0x4400 # half 4 -MX(mandelbrot_rvv_f16_): - beqz a0, MX(rvv_f16_13) - beqz a1, MX(rvv_f16_9) - li a7, 0 - fcvt.s.wu fa2, a0 - lui a3, 262144 - fmv.w.x fa1, a3 - la a3, MX(rvv_f16_m1p5) - flh fa5, (a3) - la a3, MX(rvv_f16_m1) - flh fa4, (a3) - la a3, MX(rvv_f16_p4) - flh fa3, (a3) - fdiv.s fa2, fa1, fa2 - fcvt.h.s fa2, fa2 - slli a6, a0, 2 - j MX(rvv_f16_4) -MX(rvv_f16_3): - addi a7, a7, 1 - add a2, a2, a6 - beq a7, a0, MX(rvv_f16_13) -MX(rvv_f16_4): - fcvt.s.wu fa1, a7 - fcvt.h.s fa1, fa1 - mv t0, a0 - j MX(rvv_f16_6) -MX(rvv_f16_5): - slli a3, t0, 2 - add a3, a3, a2 - vsetvli zero, zero, e32, MX2(), ta, ma - vse32.v v8, (a3) - beqz t0, MX(rvv_f16_3) -MX(rvv_f16_6): - vsetvli a3, t0, e32, MX2(), ta, ma - sub t0, t0, a3 - vmset.m v0 - vmv.v.i v8, 0 - vsetvli zero, zero, e16, MX(), ta, ma - viota.m v12, v0 - vadd.vx v12, v12, t0 - vfcvt.f.xu.v v12, v12 - vfmv.v.f v14, fa1 - vfmul.vf v12, v12, fa2 - vfadd.vf v12, v12, fa5 - vfmul.vf v14, v14, fa2 - vfadd.vf v14, v14, fa4 - vmv.v.i v20, 0 - li a4, 1 - mv a3, a1 - vmv.v.i v16, 0 - vmv.v.i v18, 0 - vmv.v.i v22, 0 -MX(rvv_f16_7): -#if HAS_RVV_1_0 || MX_N >= 2 - vsetvli zero, zero, e8, MXf2(), ta, ma -#else - vsetvli zero, zero, e8, m1, ta, ma -#endif - vfirst.m a5, v0 - bltz a5, MX(rvv_f16_5) - vsetvli zero, zero, e16, MX(), ta, ma - vfadd.vv v24, v18, v22 - vmflt.vf v0, v24, fa3 - vfsub.vv v18, v18, v22 - vfadd.vv v20, v20, v20 - vfadd.vv v24, v18, v12 - vfmadd.vv v16, v20, v14 - vfmul.vv v18, v24, v24 - vfmul.vv v22, v16, v16 - vsetvli zero, zero, e32, MX2(), ta, ma - vmerge.vxm v8, v8, a4, v0 - addi a3, a3, -1 - addi a4, a4, 1 -#if HAS_RVV_1_0 - vmv2r.v v20, v24 -#else - vsetvli zero, zero, e32, m2 - vmv.v.v v20, v24 -#endif - bnez a3, MX(rvv_f16_7) - j MX(rvv_f16_5) -MX(rvv_f16_9): - slli a3, a0, 2 -MX(rvv_f16_10): - mv a4, a0 -MX(rvv_f16_11): - vsetvli a5, a4, e32, MX2(), ta, ma - sub a4, a4, a5 - vmv.v.i v8, 0 - slli a5, a4, 2 - add a5, a5, a2 - vse32.v v8, (a5) - bnez a4, MX(rvv_f16_11) - addi a1, a1, 1 - add a2, a2, a3 - bne a1, a0, MX(rvv_f16_10) -MX(rvv_f16_13): - ret -#endif - - -.global MX(mandelbrot_rvv_f32_) # generated by clang -MX(mandelbrot_rvv_f32_): - beqz a0, MX(rvv_f32_13) - beqz a1, MX(rvv_f32_9) - li a7, 0 - fcvt.s.wu fa5, a0 - lui a3, 262144 - fmv.w.x fa4, a3 - fdiv.s fa5, fa4, fa5 - lui a3, 785408 - fmv.w.x fa4, a3 - lui a3, 784384 - fmv.w.x fa3, a3 - lui a3, 264192 - fmv.w.x fa2, a3 - slli a6, a0, 2 - j MX(rvv_f32_4) -MX(rvv_f32_3): - addi a7, a7, 1 - add a2, a2, a6 - beq a7, a0, MX(rvv_f32_13) -MX(rvv_f32_4): - fcvt.s.wu fa1, a7 - mv t0, a0 - j MX(rvv_f32_6) -MX(rvv_f32_5): - slli a3, t0, 2 - add a3, a3, a2 - vsetvli zero, zero, e32, MX(), ta, ma - vse32.v v8, (a3) - beqz t0, MX(rvv_f32_3) -MX(rvv_f32_6): - vsetvli t1, t0, e32, MX(), ta, ma - sub t0, t0, t1 - vmset.m v0 - vmv.v.i v8, 0 - viota.m v10, v0 - vadd.vx v10, v10, t0 - vfcvt.f.xu.v v10, v10 - vfmv.v.f v12, fa1 - vfmul.vf v10, v10, fa5 - vfadd.vf v10, v10, fa4 - vfmul.vf v12, v12, fa5 - vfadd.vf v12, v12, fa3 - vmv.v.i v18, 0 - li a3, 1 - mv a5, a1 - vmv.v.i v14, 0 - vmv.v.i v16, 0 - vmv.v.i v20, 0 -MX(rvv_f32_7): -#if HAS_RVV_1_0 - vsetvli zero, t1, e8, MXf4(), ta, ma -#else - vsetvli zero, t1, e8, m1, ta, ma -#endif - vfirst.m a4, v0 - bltz a4, MX(rvv_f32_5) - vsetvli zero, zero, e32, MX(), ta, ma - vfadd.vv v22, v16, v20 - vmflt.vf v0, v22, fa2 - vfsub.vv v16, v16, v20 - vfadd.vv v18, v18, v18 - vfadd.vv v22, v16, v10 - vfmadd.vv v14, v18, v12 - vfmul.vv v16, v22, v22 - vfmul.vv v20, v14, v14 - vmerge.vxm v8, v8, a3, v0 - addi a5, a5, -1 - addi a3, a3, 1 - vmv.v.v v18, v22 - bnez a5, MX(rvv_f32_7) - j MX(rvv_f32_5) -MX(rvv_f32_9): - slli a3, a0, 2 -MX(rvv_f32_10): - mv a4, a0 -MX(rvv_f32_11): - vsetvli a5, a4, e32, MX(), ta, ma - sub a4, a4, a5 - vmv.v.i v8, 0 - slli a5, a4, 2 - add a5, a5, a2 - vse32.v v8, (a5) - bnez a4, MX(rvv_f32_11) - addi a1, a1, 1 - add a2, a2, a3 - bne a1, a0, MX(rvv_f32_10) -MX(rvv_f32_13): - ret - -#endif - -#if MX_N == 2 && HAS_E64 - -.global MX(mandelbrot_rvv_f64_) # generated by clang -MX(rvv_f64_m1p5): - .quad 0xbff8000000000000 # double -1.5 -MX(rvv_f64_m1): - .quad 0xbff0000000000000 # double -1 -MX(rvv_f64_p4): - .quad 0x4010000000000000 # double 4 -MX(mandelbrot_rvv_f64_): - beqz a0, MX(rvv_f64_13) - beqz a1, MX(rvv_f64_9) - li a7, 0 - fcvt.s.wu fa2, a0 - lui a3, 262144 - fmv.w.x fa1, a3 - la a3, MX(rvv_f64_m1p5) - fld fa5, (a3) - la a3, MX(rvv_f64_m1) - fld fa4, (a3) - la a3, MX(rvv_f64_p4) - fld fa3, (a3) - fdiv.s fa2, fa1, fa2 - fcvt.d.s fa2, fa2 - slli a6, a0, 2 - j MX(rvv_f64_4) -MX(rvv_f64_3): - addi a7, a7, 1 - add a2, a2, a6 - beq a7, a0, MX(rvv_f64_13) -MX(rvv_f64_4): - fcvt.d.wu fa1, a7 - mv t0, a0 - j MX(rvv_f64_6) -MX(rvv_f64_5): - slli a3, t0, 2 - add a3, a3, a2 - vsetvli zero, zero, e32, m1, ta, ma - vse32.v v8, (a3) - beqz t0, MX(rvv_f64_3) -MX(rvv_f64_6): - vsetvli a3, t0, e32, m1, ta, ma - sub t0, t0, a3 - vmset.m v0 - vmv.v.i v8, 0 - vsetvli zero, zero, e64, m2, ta, ma - viota.m v10, v0 - vadd.vx v10, v10, t0 - vfcvt.f.xu.v v10, v10 - vfmv.v.f v12, fa1 - vfmul.vf v10, v10, fa2 - vfadd.vf v10, v10, fa5 - vfmul.vf v12, v12, fa2 - vfadd.vf v12, v12, fa4 - vmv.v.i v18, 0 - li a4, 1 - mv a3, a1 - vmv.v.i v14, 0 - vmv.v.i v16, 0 - vmv.v.i v20, 0 -MX(rvv_f64_7): -#if HAS_RVV_1_0 - vsetvli zero, zero, e8, MXf8(), ta, ma -#else - vsetvli zero, t1, e8, m1, ta, ma -#endif - vfirst.m a5, v0 - bltz a5, MX(rvv_f64_5) - vsetvli zero, zero, e64, m2, ta, ma - vfadd.vv v22, v16, v20 - vmflt.vf v0, v22, fa3 - vfsub.vv v16, v16, v20 - vfadd.vv v18, v18, v18 - vfadd.vv v22, v16, v10 - vfmadd.vv v14, v18, v12 - vfmul.vv v16, v22, v22 - vfmul.vv v20, v14, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vxm v8, v8, a4, v0 - addi a3, a3, -1 - addi a4, a4, 1 -#if HAS_RVV_1_0 - vmv2r.v v18, v22 -#else - vsetvli zero, zero, e32, m2 - vmv.v.v v18, v22 -#endif - bnez a3, MX(rvv_f64_7) - j MX(rvv_f64_5) -MX(rvv_f64_9): - slli a3, a0, 2 -MX(rvv_f64_10): - mv a4, a0 -MX(rvv_f64_11): - vsetvli a5, a4, e32, m1, ta, ma - sub a4, a4, a5 - vmv.v.i v8, 0 - slli a5, a4, 2 - add a5, a5, a2 - vse32.v v8, (a5) - bnez a4, MX(rvv_f64_11) - addi a1, a1, 1 - add a2, a2, a3 - bne a1, a0, MX(rvv_f64_10) -MX(rvv_f64_13): - ret - -#endif - - diff --git a/tests/rvv_bench/mandelbrot/mandelbrot.c b/tests/rvv_bench/mandelbrot/mandelbrot.c deleted file mode 100644 index f182eba0f..000000000 --- a/tests/rvv_bench/mandelbrot/mandelbrot.c +++ /dev/null @@ -1,94 +0,0 @@ -#include "bench.h" - -void -mandelbrot_scalar_f32(size_t width, size_t maxIter, uint32_t *res) -{ - for (size_t y = 0; y < width; ++y) - for (size_t x = 0; x < width; ++x) { - float cx = x * 2.0f / width - 1.5; - float cy = y * 2.0f / width - 1; - size_t iter = 0; - float zx = 0, zy = 0, zxS = 0, zyS = 0; - - BENCH_VOLATILE_REG(cy); - while (zxS + zyS <= 4 && iter < maxIter) { - zxS = zxS - zyS + cx; - zy = 2 * zx * zy + cy; - zx = zxS; - BENCH_VOLATILE_REG(zx); - zxS = zx*zx; - zyS = zy*zy; - ++iter; - BENCH_CLOBBER(); - } - *res++ = iter; - } -} - -#if __riscv_xlen >= 64 -void -mandelbrot_scalar_f64(size_t width, size_t maxIter, uint32_t *res) -{ - for (size_t y = 0; y < width; ++y) - for (size_t x = 0; x < width; ++x) { - double cx = x * 2.0 / width - 1.5; - double cy = y * 2.0 / width - 1; - size_t iter = 0; - double zx = 0, zy = 0, zxS = 0, zyS = 0; - - BENCH_VOLATILE_REG(cy); - while (zxS + zyS <= 4 && iter < maxIter) { - zxS = zxS - zyS + cx; - zy = 2 * zx * zy + cy; - zx = zxS; - BENCH_VOLATILE_REG(zx); - zxS = zx*zx; - zyS = zy*zy; - ++iter; - } - *res++ = iter; - } -} -#endif - -#if HAS_F16 -# define IMPLS_F16(f) f(rvv_f16_m1) f(rvv_f16_m2) -#else -# define IMPLS_F16(f) -#endif - -#define IMPLS(f) \ - f(rvv_f32_m1) \ - f(scalar_f32) \ - IF64(f(scalar_f64)) \ - IMPLS_F16(f) \ - f(rvv_f32_m2) \ - IF64(f(rvv_f64_m2)) \ - -typedef void Func(size_t width, size_t maxIter, uint32_t *res); - -#define DECLARE(f) extern Func mandelbrot_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &mandelbrot_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint32_t *dest; -void init(void) { memset(mem, 0, MAX_MEM); dest = (uint32_t*)mem; } - -/* disabled, because of rounding errors, please independently verify */ -ux checksum(size_t n) { return 0; } - -BENCH(base) { - n = usqrt(n); - TIME f(n, mandelbrot_ITER, dest); -} BENCH_END - -Bench benches[] = { - { - SCALE_mandelbrot(MAX_MEM / 4), - "mandelbrot "STR(mandelbrot_ITER), - bench_base - }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/memcpy/memcpy.S b/tests/rvv_bench/memcpy/memcpy.S deleted file mode 100644 index 6511a0493..000000000 --- a/tests/rvv_bench/memcpy/memcpy.S +++ /dev/null @@ -1,153 +0,0 @@ -#if 0 -void *memcpy_rvv(void *restrict dest, void const *restrict src, size_t n) { - unsigned char *d = dest; - unsigned char const *s = src; - for (size_t vl; n > 0; n -= vl, s += vl, d += vl) { - vl = __riscv_vsetvl_e8m8(n); - vuint8m8_t vec_src = __riscv_vle8_v_u8m8(s, vl); - __riscv_vse8_v_u8m8(d, vec_src, vl); - } - return dest; -} -#endif - - -#ifdef MX - -# a0 = dest, a1 = src, a2 = len -.global MX(memcpy_rvv_) -MX(memcpy_rvv_): - mv a3, a0 -1: - vsetvli t0, a2, e8, MX(), ta, ma - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 - bnez a2, 1b - ret - -.global MX(memcpy_rvv_align_dest_) -MX(memcpy_rvv_align_dest_): - mv a3, a0 -#if HAS_RVV_1_0 - csrr t0, vlenb -#else - vsetvli t0, zero, e8, m1, ta, ma # vlenb -#endif - bltu a2, t0, 2f # len < vlenb - # align dest to vlenb - sub t1, zero, a0 - addi t2, t0, -1 - and t1, t1, t2 #align = (-dest) & (vlenb-1) - vsetvli t0, t1, e8, MX(), ta, ma -1: - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - -.global MX(memcpy_rvv_align_src_) -MX(memcpy_rvv_align_src_): - mv a3, a0 -#if HAS_RVV_1_0 - csrr t0, vlenb -#else - vsetvli t0, zero, e8, m1, ta, ma # vlen -#endif - bltu a2, t0, 2f # len < vlen - # align src to vlen - sub t1, zero, a1 - addi t2, t0, -1 - and t1, t1, t2 # align = (-src) & (vlen-1) - vsetvli t0, t1, e8, MX(), ta, ma -1: - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - -# combination of memcpy_rvv_align_dest and memcpy_rvv -.global MX(memcpy_rvv_align_dest_hybrid_) -MX(memcpy_rvv_align_dest_hybrid_): - mv a3, a0 -#if HAS_RVV_1_0 - csrr t0, vlenb -#else - vsetvli t0, zero, e8, m1, ta, ma # vlen -#endif - slli t1, t0, 8 # skip costly division for more values - bltu a2, t1, 2f # len < vlen - sub t1, zero, a0 - addi t2, t0, -1 - and t1, t1, t2 # align = (-dest) & (vlen-1) - vsetvli t0, t1, e8, MX(), ta, ma # align dest to vlen -1: - vle8.v v0, (a1) - add a1, a1, t0 - sub a2, a2, t0 - vse8.v v0, (a3) - add a3, a3, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - - -.global MX(memcpy_rvv_tail_) -MX(memcpy_rvv_tail_): - vsetvli t0, a2, e8, MX(), ta, ma - remu a3, a2, t0 # tail = n % vlenb - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n - mv a2, a0 # n = dest -1: - vle8.v v8, (a1) - add a1, a1, t0 # src += vlenb - vse8.v v8, (a2) - add a2, a2, t0 # dest += vlenb - bltu a2, a4, 1b # dest < end - # copy tail - vsetvli zero, a3, e8, MX(), ta, ma - vle8.v v8, (a1) - vse8.v v8, (a2) - ret - -# this is supposed to test how well the implementation handles -# operations with an vl smaller than VLMAX -.global MX(memcpy_rvv_128_) -MX(memcpy_rvv_128_): - li t0, 128/8 - bgt a2, t0, 1f - mv t0, a2 -1: - vsetvli t0, t0, e8, MX(), ta, ma - remu a3, a2, t0 # tail = n % vlenb - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n - mv a2, a0 # n = dest -1: - vle8.v v8, (a1) - add a1, a1, t0 # src += vlenb - vse8.v v8, (a2) - add a2, a2, t0 # dest += vlenb - bltu a2, a4, 1b # dest < end - # copy tail - vsetvli zero, a3, e8, MX(), ta, ma - vle8.v v8, (a1) - vse8.v v8, (a2) - ret - -#endif - diff --git a/tests/rvv_bench/memcpy/memcpy.c b/tests/rvv_bench/memcpy/memcpy.c deleted file mode 100644 index 60a977c71..000000000 --- a/tests/rvv_bench/memcpy/memcpy.c +++ /dev/null @@ -1,197 +0,0 @@ -#include "bench.h" - -void * -memcpy_scalar(void *restrict dest, void const *restrict src, size_t n) -{ - unsigned char *d = dest; - unsigned char const *s = src; - while (n--) *d++ = *s++, BENCH_CLOBBER(); - return dest; -} - -void * -memcpy_scalar_autovec(void *restrict dest, void const *restrict src, size_t n) -{ - unsigned char *d = dest; - unsigned char const *s = src; - while (n--) *d++ = *s++; - return dest; -} - -/* https://git.musl-libc.org/cgit/musl/tree/src/string/memcpy.c */ -void * -memcpy_musl(void *restrict dest, void const *restrict src, size_t n) -{ - unsigned char *d = dest; - unsigned char const *s = src; - -#ifdef __GNUC__ - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define LS >> -#define RS << -#else -#define LS << -#define RS >> -#endif - - typedef uint32_t __attribute__((__may_alias__)) u32; - uint32_t w, x; - - for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++; - - if ((uintptr_t)d % 4 == 0) { - for (; n>=16; s+=16, d+=16, n-=16) { - *(u32 *)(d+0) = *(u32 *)(s+0); - *(u32 *)(d+4) = *(u32 *)(s+4); - *(u32 *)(d+8) = *(u32 *)(s+8); - *(u32 *)(d+12) = *(u32 *)(s+12); - } - if (n&8) { - *(u32 *)(d+0) = *(u32 *)(s+0); - *(u32 *)(d+4) = *(u32 *)(s+4); - d += 8; s += 8; - } - if (n&4) { - *(u32 *)(d+0) = *(u32 *)(s+0); - d += 4; s += 4; - } - if (n&2) { - *d++ = *s++; *d++ = *s++; - } - if (n&1) { - *d = *s; - } - return dest; - } - - if (n >= 32) switch ((uintptr_t)d % 4) { - case 1: - w = *(u32 *)s; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - n -= 3; - for (; n>=17; s+=16, d+=16, n-=16) { - x = *(u32 *)(s+1); - *(u32 *)(d+0) = (w LS 24) | (x RS 8); - w = *(u32 *)(s+5); - *(u32 *)(d+4) = (x LS 24) | (w RS 8); - x = *(u32 *)(s+9); - *(u32 *)(d+8) = (w LS 24) | (x RS 8); - w = *(u32 *)(s+13); - *(u32 *)(d+12) = (x LS 24) | (w RS 8); - } - break; - case 2: - w = *(u32 *)s; - *d++ = *s++; - *d++ = *s++; - n -= 2; - for (; n>=18; s+=16, d+=16, n-=16) { - x = *(u32 *)(s+2); - *(u32 *)(d+0) = (w LS 16) | (x RS 16); - w = *(u32 *)(s+6); - *(u32 *)(d+4) = (x LS 16) | (w RS 16); - x = *(u32 *)(s+10); - *(u32 *)(d+8) = (w LS 16) | (x RS 16); - w = *(u32 *)(s+14); - *(u32 *)(d+12) = (x LS 16) | (w RS 16); - } - break; - case 3: - w = *(u32 *)s; - *d++ = *s++; - n -= 1; - for (; n>=19; s+=16, d+=16, n-=16) { - x = *(u32 *)(s+3); - *(u32 *)(d+0) = (w LS 8) | (x RS 24); - w = *(u32 *)(s+7); - *(u32 *)(d+4) = (x LS 8) | (w RS 24); - x = *(u32 *)(s+11); - *(u32 *)(d+8) = (w LS 8) | (x RS 24); - w = *(u32 *)(s+15); - *(u32 *)(d+12) = (x LS 8) | (w RS 24); - } - break; - } - if (n&16) { - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - } - if (n&8) { - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - } - if (n&4) { - *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; - } - if (n&2) { - *d++ = *s++; *d++ = *s++; - } - if (n&1) { - *d = *s; - } - return dest; -#endif - - while (n--) { *d++ = *s++; BENCH_CLOBBER(); } - return dest; -} - -#define memcpy_libc memcpy - -#define IMPLS(f) \ - IFHOSTED(f(libc)) \ - f(musl) \ - f(scalar) \ - f(scalar_autovec) \ - MX(f, rvv) \ - MX(f, rvv_align_dest) \ - MX(f, rvv_align_src) \ - MX(f, rvv_align_dest_hybrid) \ - MX(f, rvv_tail) \ - MX(f, rvv_128) \ - -typedef void *Func(void *restrict dest, void const *restrict src, size_t n); - -#define DECLARE(f) extern Func memcpy_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &memcpy_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint8_t *dest, *src; -ux last; - -void init(void) { } - -ux checksum(size_t n) { - ux sum = last; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t dOff, size_t sOff) { - dest = mem + dOff; src = dest + MAX_MEM/2 + sOff + 9; - memset(dest, 0, n+9); -} - -BENCH(base) { - common(n, urand() & 255, urand() & 255); - TIME last = (uintptr_t)f(dest, src, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0, 0); - TIME last = (uintptr_t)f(dest, src, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM/2 - 521, "memcpy", bench_base }, - { MAX_MEM/2 - 521, "memcpy aligned", bench_aligned} -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/memset/memset.S b/tests/rvv_bench/memset/memset.S deleted file mode 100644 index 3d00eae62..000000000 --- a/tests/rvv_bench/memset/memset.S +++ /dev/null @@ -1,96 +0,0 @@ -#if 0 -void *memset(void *dst, int n, size_t len) { - unsigned char *d = dst; - vuint8m8_t v = __riscv_vmv_v_x_u8m8((uint8_t)n, __riscv_vsetvlmax_e8m8()); - for (size_t vl; len > 0; len -= vl, d += vl) { - vl = __riscv_vsetvl_e8m8(len); - __riscv_vse8_v_u8m8(d, v, vl); - } - return dst; -} -#endif - -#ifdef MX - -.global MX(memset_rvv_) -MX(memset_rvv_): - vsetvli a3, zero, e8, MX(), ta, ma - vmv.v.x v8, a1 - mv a1, a0 -1: - vsetvli a3, a2, e8, MX(), ta, ma - vse8.v v8, (a1) - sub a2, a2, a3 - add a1, a1, a3 - bnez a2, 1b - ret - - -.global MX(memset_rvv_align_) -MX(memset_rvv_align_): - vsetvli t0, zero, e8, m1, ta, ma # vlen - vmv.v.x v8, a1 - mv a1, a0 - vsetvli t0, zero, e8, MX(), ta, ma # vlen - bltu a2, t0, 2f # len < vlen - # align dest to vlen - sub t1, zero, a0 - remu t1, t1, t0 # align = (-dest) % vlen - vsetvli t0, t1, e8, MX(), ta, ma -1: - vse8.v v8, (a1) - sub a2, a2, t0 - add a1, a1, t0 -2: - vsetvli t0, a2, e8, MX(), ta, ma - bnez a2, 1b - ret - -.global MX(memset_rvv_tail_) -MX(memset_rvv_tail_): - vsetvli t0, a2, e8, MX(), ta, ma - vmv.v.x v8, a1 - remu a3, a2, t0 # tail = n % vlenb - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n - mv a2, a0 # n = dest -1: - vse8.v v8, (a2) - add a2, a2, t0 # dest += vlenb - bltu a2, a4, 1b # dest < end - # handle tail - vsetvli zero, a3, e8, MX(), ta, ma - vse8.v v8, (a2) - ret - -.global MX(memset_rvv_tail_4x_) -MX(memset_rvv_tail_4x_): - vsetvli t0, a2, e8, MX(), ta, ma - vmv.v.x v8, a1 - slli t1, t0, 2 - mv a5, a0 - mv a3, a2 - bltu a2, t1, 2f - remu a3, a2, t1 # tail = n % (vlenb*4) - sub a2, a2, a3 # n -= tail - add a4, a0, a2 # end = dest + n -1: - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - vse8.v v8, (a5) - add a5, a5, t0 # dest += vlenb - bltu a5, a4, 1b # dest < end - # handle tail -2: - vsetvli a4, a3, e8, MX(), ta, ma - vse8.v v8, (a5) - sub a3, a3, a4 - add a5, a5, a4 - bnez a3, 2b - ret - -#endif diff --git a/tests/rvv_bench/memset/memset.c b/tests/rvv_bench/memset/memset.c deleted file mode 100644 index 9b2f7c463..000000000 --- a/tests/rvv_bench/memset/memset.c +++ /dev/null @@ -1,163 +0,0 @@ -#include "bench.h" - -void * -memset_scalar(void *dest, int c, size_t n) -{ - unsigned char *d = dest; - while (n--) *d++ = c, BENCH_CLOBBER(); - return dest; -} - -void * -memset_scalar_autovec(void *dest, int c, size_t n) -{ - unsigned char *d = dest; - while (n--) *d++ = c; - return dest; -} - -/* https://git.musl-libc.org/cgit/musl/tree/src/string/memset.c */ -#if __riscv_xlen >= 64 -void * -memset_musl(void *dest, int c, size_t n) -{ - unsigned char *s = dest; - size_t k; - - /* Fill head and tail with minimal branching. Each - * conditional ensures that all the subsequently used - * offsets are well-defined and in the dest region. */ - - if (!n) return dest; - s[0] = c; - s[n-1] = c; - if (n <= 2) return dest; - s[1] = c; - s[2] = c; - s[n-2] = c; - s[n-3] = c; - if (n <= 6) return dest; - s[3] = c; - s[n-4] = c; - if (n <= 8) return dest; - - /* Advance pointer to align it at a 4-byte boundary, - * and truncate n to a multiple of 4. The previous code - * already took care of any head/tail that get cut off - * by the alignment. */ - - k = -(uintptr_t)s & 3; - s += k; - n -= k; - n &= -4; - -#ifdef __GNUC__ - typedef uint32_t __attribute__((__may_alias__)) u32; - typedef uint64_t __attribute__((__may_alias__)) u64; - - u32 c32 = ((u32)-1)/255 * (unsigned char)c; - - /* In preparation to copy 32 bytes at a time, aligned on - * an 8-byte bounary, fill head/tail up to 28 bytes each. - * As in the initial byte-based head/tail fill, each - * conditional below ensures that the subsequent offsets - * are valid (e.g. !(n<=24) implies n>=28). */ - - *(u32 *)(s+0) = c32; - *(u32 *)(s+n-4) = c32; - if (n <= 8) return dest; - *(u32 *)(s+4) = c32; - *(u32 *)(s+8) = c32; - *(u32 *)(s+n-12) = c32; - *(u32 *)(s+n-8) = c32; - if (n <= 24) return dest; - *(u32 *)(s+12) = c32; - *(u32 *)(s+16) = c32; - *(u32 *)(s+20) = c32; - *(u32 *)(s+24) = c32; - *(u32 *)(s+n-28) = c32; - *(u32 *)(s+n-24) = c32; - *(u32 *)(s+n-20) = c32; - *(u32 *)(s+n-16) = c32; - - /* Align to a multiple of 8 so we can fill 64 bits at a time, - * and avoid writing the same bytes twice as much as is - * practical without introducing additional branching. */ - - k = 24 + ((uintptr_t)s & 4); - s += k; - n -= k; - - /* If this loop is reached, 28 tail bytes have already been - * filled, so any remainder when n drops below 32 can be - * safely ignored. */ - - u64 c64 = c32 | ((u64)c32 << 32); - for (; n >= 32; n-=32, s+=32) { - *(u64 *)(s+0) = c64; - *(u64 *)(s+8) = c64; - *(u64 *)(s+16) = c64; - *(u64 *)(s+24) = c64; - } -#else - /* Pure C fallback with no aliasing violations. */ - while (n--) *s++ = c; -#endif - - return dest; -} -#endif - -#define memset_libc memset - -#define IMPLS(f) \ - IFHOSTED(f(libc)) \ - IF64(f(musl)) \ - f(scalar) \ - f(scalar_autovec) \ - MX(f, rvv) \ - MX(f, rvv_align) \ - MX(f, rvv_tail) \ - MX(f, rvv_tail_4x) \ - -typedef void *Func(void *dest, int c, size_t n); - -#define DECLARE(f) extern Func memset_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &memset_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -uint8_t *dest; -ux last; -char c; - -void init(void) { c = urand(); } - -ux checksum(size_t n) { - ux sum = last; - for (size_t i = 0; i < n+9; ++i) - sum = uhash(sum) + dest[i]; - return sum; -} - -void common(size_t n, size_t off) { - dest = mem + off; - memset(dest, c+3, n+9); -} - -BENCH(base) { - common(n, urand() & 511); - TIME last = (uintptr_t)f(dest, c, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0); - TIME last = (uintptr_t)f(dest, c, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM - 521, "memset", bench_base }, - { MAX_MEM - 521, "memset aligned", bench_aligned} -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/mergelines/mergelines.S b/tests/rvv_bench/mergelines/mergelines.S deleted file mode 100644 index 051a0d7de..000000000 --- a/tests/rvv_bench/mergelines/mergelines.S +++ /dev/null @@ -1,179 +0,0 @@ -#if 0 -size_t -mergelines_rvv(char *str, size_t len) -{ - uint8_t *dest = (uint8_t*)str; - uint8_t *src = (uint8_t*)str; - char last = 0; - - vuint8m8_t v, u, d; - vbool1_t m; - - for (size_t vl, VL; len > 1; ) { - VL = vl = __riscv_vsetvl_e8m8(len); - - char next = len > vl ? src[vl] : 0; - v = __riscv_vle8_v_u8m8(src, vl); - u = __riscv_vslide1up_vx_u8m8(v, last, vl); - d = __riscv_vslide1down_vx_u8m8(v, next, vl); - - m = __riscv_vmor_mm_b1(__riscv_vmsne_vx_u8m8_b1(u, '\\', vl), __riscv_vmsne_vx_u8m8_b1(v, '\n', vl), vl); - #if DO_SKIP - if (likely(__riscv_vcpop_m_b1(m, vl) == vl && next != '\n')) - goto skip; - #endif - m = __riscv_vmand_mm_b1( - m, - __riscv_vmor_mm_b1(__riscv_vmsne_vx_u8m8_b1(v, '\\', vl), __riscv_vmsne_vx_u8m8_b1(d, '\n', vl), vl), - vl); - - v = __riscv_vcompress_vm_u8m8(v, m, vl); - vl = __riscv_vcpop_m_b1(m, vl); - skip: - __riscv_vse8_v_u8m8(dest, v, vl); - dest += vl; src += VL; len -= VL; - last = src[-1]; - } - - if (len > 0 && !(last == '\\' && *src == '\n')) *dest++ = *src++; - return (dest - (uint8_t*)str); -} -#endif - -#ifdef MX - -.global MX(mergelines_rvv_) # generated by clang -MX(mergelines_rvv_): - li a2, 2 - bltu a1, a2, MX(rvv_6) - li t0, 0 - li a7, 92 - li a6, 1 - mv a2, a0 - mv a4, a0 - j MX(rvv_4) -MX(rvv_2): # in Loop: Header=BB0_4 Depth=1 - add a3, a4, a5 - lbu t1, 0(a3) -MX(rvv_3): # in Loop: Header=BB0_4 Depth=1 - vle8.v v8, (a4) - add a3, a4, a5 - vslide1up.vx v16, v8, t0 - vslide1down.vx v24, v8, t1 - vmsne.vx v0, v16, a7 - vmsne.vi v16, v8, 10 - vmor.mm v16, v0, v16 - vmsne.vx v17, v8, a7 - vmsne.vi v18, v24, 10 - vmor.mm v17, v17, v18 - vmand.mm v16, v16, v17 - vcompress.vm v24, v8, v16 - vcpop.m a4, v16 - vsetvli zero, a4, e8, MX(), ta, ma - vse8.v v24, (a2) - lbu t0, -1(a3) - sub a1, a1, a5 - add a2, a2, a4 - mv a4, a3 - bgeu a6, a1, MX(rvv_8) -MX(rvv_4): # =>This Inner Loop Header: Depth=1 - vsetvli a5, a1, e8, MX(), ta, ma - bltu a5, a1, MX(rvv_2) - li t1, 0 - j MX(rvv_3) -MX(rvv_6): - mv a2, a0 - beqz a1, MX(rvv_10) - lbu a1, 0(a0) - mv a2, a0 - j MX(rvv_11) -MX(rvv_8): - beqz a1, MX(rvv_10) - lbu a1, 0(a3) - xori a3, t0, 92 - xori a4, a1, 10 - or a3, a3, a4 - bnez a3, MX(rvv_11) -MX(rvv_10): - sub a0, a2, a0 - ret -MX(rvv_11): - addi a3, a2, 1 - sb a1, 0(a2) - sub a0, a3, a0 - ret - - -.global MX(mergelines_rvv_skip_) # generated by clang -MX(mergelines_rvv_skip_): - li a2, 2 - bltu a1, a2, MX(rvv_skip_9) - li a5, 0 - li a6, 92 - li a7, 1 - mv t1, a0 - mv a3, a0 -MX(rvv_skip_2): # =>This Inner Loop Header: Depth=1 - vsetvli a4, a1, e8, MX(), ta, ma - bgeu a4, a1, MX(rvv_skip_4) - add a2, a3, a4 - lbu t0, 0(a2) - j MX(rvv_skip_5) -MX(rvv_skip_4): # in Loop: Header=BB0_2 Depth=1 - li t0, 0 -MX(rvv_skip_5): # in Loop: Header=BB0_2 Depth=1 - vle8.v v8, (a3) - vslide1up.vx v16, v8, a5 - vmsne.vx v24, v16, a6 - vmsne.vi v16, v8, 10 - vmor.mm v16, v24, v16 - vcpop.m a2, v16 - xor a2, a2, a4 - seqz a2, a2 - addi a5, t0, -10 - snez a5, a5 - and a2, a2, a5 - beqz a2, MX(rvv_skip_8) - mv a2, a4 -MX(rvv_skip_7): # in Loop: Header=BB0_2 Depth=1 - add a3, a3, a4 - vsetvli zero, a2, e8, MX(), ta, ma - vse8.v v8, (t1) - lbu a5, -1(a3) - sub a1, a1, a4 - add t1, t1, a2 - bltu a7, a1, MX(rvv_skip_2) - j MX(rvv_skip_11) -MX(rvv_skip_8): # in Loop: Header=BB0_2 Depth=1 - vslide1down.vx v24, v8, t0 - vmsne.vx v17, v8, a6 - vmsne.vi v18, v24, 10 - vmor.mm v17, v17, v18 - vmand.mm v16, v16, v17 - vcompress.vm v24, v8, v16 - vcpop.m a2, v16 - vmv.v.v v8, v24 - j MX(rvv_skip_7) -MX(rvv_skip_9): - mv t1, a0 - beqz a1, MX(rvv_skip_13) - lbu a1, 0(a0) - mv t1, a0 - j MX(rvv_skip_14) -MX(rvv_skip_11): - beqz a1, MX(rvv_skip_13) - lbu a1, 0(a3) - xori a2, a5, 92 - xori a3, a1, 10 - or a2, a2, a3 - bnez a2, MX(rvv_skip_14) -MX(rvv_skip_13): - sub a0, t1, a0 - ret -MX(rvv_skip_14): - addi a2, t1, 1 - sb a1, 0(t1) - sub a0, a2, a0 - ret - -#endif diff --git a/tests/rvv_bench/mergelines/mergelines.c b/tests/rvv_bench/mergelines/mergelines.c deleted file mode 100644 index 2d1d2078d..000000000 --- a/tests/rvv_bench/mergelines/mergelines.c +++ /dev/null @@ -1,75 +0,0 @@ -#include "bench.h" - -size_t -mergelines_scalar(char *str, size_t len) -{ - char *dest = str; - char *src = str; - - while (len > 1) { - if (src[0] == '\\' && src[1] == '\n') - src += 2, len -= 2; - else - *dest++ = *src++, --len; - BENCH_CLOBBER(); - } - if (len > 0) - *dest++ = *src++; - return dest - str; -} - -#define IMPLS(f) \ - MX(f, rvv) \ - f(scalar) \ - MX(f, rvv_skip) \ - -typedef size_t Func(char *buf, size_t len); - -#define DECLARE(f) extern Func mergelines_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &mergelines_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -char *str; -ux last; - -void init(void) { } -ux checksum(size_t n) { return last; } - -void common(size_t n, char const *chars, size_t nChars) { - str = (char*)mem + (urand() & 255); - for (size_t i = 0; i < n; ++i) - str[i] = chars[urand() % nChars]; -} - -BENCH(2_3) { - common(n, "\\\na", 3); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(2_16) { - common(n, "\\\nabcdefgh", 16); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(2_32) { - common(n, "\\\nabcdefgh123456789", 32); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(2_256) { - str = (char*)mem + (urand() & 255); - for (size_t i = 0; i < n; ++i) - str[i] = urand() & 0xff; - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -#define COUNT SCALE_mergelines(MAX_MEM) - 256 -Bench benches[] = { - { COUNT, "mergelines 2/3", bench_2_3 }, - { COUNT, "mergelines 2/16", bench_2_16 }, - { COUNT, "mergelines 2/32", bench_2_32 }, - { COUNT, "mergelines 2/256", bench_2_256 } -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/poly1305/poly1305.S b/tests/rvv_bench/poly1305/poly1305.S deleted file mode 100644 index e5b332e02..000000000 --- a/tests/rvv_bench/poly1305/poly1305.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef MX -#if __riscv_xlen >= 64 -#include "rvv-chacha-poly/vpoly.s" -#endif -#endif diff --git a/tests/rvv_bench/poly1305/poly1305.c b/tests/rvv_bench/poly1305/poly1305.c deleted file mode 100644 index 72849ac75..000000000 --- a/tests/rvv_bench/poly1305/poly1305.c +++ /dev/null @@ -1,64 +0,0 @@ -#include "bench.h" -#if __riscv_xlen >= 64 -#include "thirdparty/boring.h" - -uint8_t *src; -uint8_t key[32], sig[16]; - -extern uint64_t -vector_poly1305(const uint8_t* in, size_t len, - const uint8_t key[32], uint8_t sig[16]); - -static void -poly1305_boring(void const *src, size_t n) { - poly1305_state state; - boring_poly1305_init(&state, key); - boring_poly1305_update(&state, src, n); - boring_poly1305_finish(&state, sig); -} - -static void -poly1305_rvv(void const *src, size_t n) { - vector_poly1305(src, n, key, sig); -} - -typedef void *Func(void const *src, size_t n); - -Impl impls[] = { - { "boring", &poly1305_boring }, -#if HAS_E64 - { "rvv", &poly1305_rvv }, -#endif -}; - -void init(void) { - memrand(key, sizeof key); - memrand(sig, sizeof sig); -} - -ux checksum(size_t n) { - ux sum = 0; - for (size_t i = 0; i < ARR_LEN(sig); ++i) - sum = uhash(sum) + sig[i]; - return sum; -} - -BENCH(aligned) { - for (size_t i = 0; i < 256; ++i) - mem[urand()%n] = urand(); - n = (15+n) & -16; - TIME f(mem, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM, "poly1305 aligned", bench_aligned } -}; BENCH_MAIN(impls, benches) - - -#include "../thirdparty/boring.c" -#else -void init(void) {} -Impl impls[] = {}; -Bench benches[] = {}; -BENCH_MAIN(impls, benches) -#endif diff --git a/tests/rvv_bench/strlen/strlen.S b/tests/rvv_bench/strlen/strlen.S deleted file mode 100644 index d639e5a80..000000000 --- a/tests/rvv_bench/strlen/strlen.S +++ /dev/null @@ -1,91 +0,0 @@ -#if 0 -size_t strlen_rvv(char *src) { - size_t vlmax = __riscv_vsetvlmax_e8m8(); - char *p = src; - long first = -1; - size_t vl; - while (first < 0) { - vuint8m8_t v = __riscv_vle8ff_v_u8m8((uint8_t*)p, &vl, vlmax); - first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl); - p += vl; - } - p -= vl - first; - return (size_t)(p - src); -} - -#define PAGE_SIZE 4096 -size_t strlen_rvv_page_aligned_(char *src) { - char *p = src; - long first = 0; - - size_t n = 0 - ((uintptr_t)src | -4096); - size_t vl; - for (; n > 0; n -= vl) { - vl = __riscv_vsetvl_e8m8(n); - vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t*)p, vl); - first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl); - p += vl; - if (first >= 0) { - goto end; - } - } - vl = __riscv_vsetvlmax_e8m8(); - do { - vuint8m8_t v = __riscv_vle8_v_u8m8((uint8_t*)p, vl); - first = __riscv_vfirst_m_b1(__riscv_vmseq_vx_u8m8_b1(v, 0, vl), vl); - p += vl; - } while (first < 0); -end: - p -= vl - first; - return (size_t)(p - src); -} -#endif - - -#ifdef MX - -.global MX(strlen_rvv_) -MX(strlen_rvv_): - mv a3, a0 -1: - vsetvli a1, x0, e8, MX(), ta, ma - vle8ff.v v8, (a3) - csrr a1, vl - vmseq.vi v0, v8, 0 - vfirst.m a2, v0 - add a3, a3, a1 # end += vl - bltz a2, 1b - add a0, a0, a1 # start += vl - add a3, a3, a2 # end += idx - sub a0, a3, a0 # start - end - ret - -.global MX(strlen_rvv_page_aligned_) # generated by clang -MX(strlen_rvv_page_aligned_): - lui a1, 1048575 - or a1, a1, a0 - neg a4, a1 - mv a1, a0 -1: - vsetvli a2, a4, e8, MX(), ta, ma - vle8.v v8, (a1) - vmseq.vi v16, v8, 0 - vfirst.m a3, v16 - add a1, a1, a2 - bgez a3, 1f - sub a4, a4, a2 - bnez a4, 1b - vsetvli a2, zero, e8, MX(), ta, ma -2: - vle8.v v8, (a1) - vmseq.vi v16, v8, 0 - vfirst.m a3, v16 - add a1, a1, a2 - bltz a3, 2b -1: - sub a1, a1, a2 - sub a0, a3, a0 - add a0, a0, a1 - ret - -#endif diff --git a/tests/rvv_bench/strlen/strlen.c b/tests/rvv_bench/strlen/strlen.c deleted file mode 100644 index 709e84b6f..000000000 --- a/tests/rvv_bench/strlen/strlen.c +++ /dev/null @@ -1,76 +0,0 @@ -#include "bench.h" - -size_t -strlen_scalar(char const *s) -{ - char const *a = s; - while (*s) ++s, BENCH_CLOBBER(); - return s - a; -} - -size_t -strlen_scalar_autovec(char const *s) -{ - char const *a = s; - while (*s) ++s; - return s - a; -} - -/* https://git.musl-libc.org/cgit/musl/tree/src/string/strlen.c */ -#define ONES ((size_t)-1/UCHAR_MAX) -#define HIGHS (ONES * (UCHAR_MAX/2+1)) -#define HASZERO(x) (((x)-ONES) & ~(x) & HIGHS) -size_t -strlen_musl(char const *s) -{ - char const *a = s; -#ifdef __GNUC__ - typedef size_t __attribute__((__may_alias__)) word; - word const *w; - for (; (uintptr_t)s % sizeof *w; s++) if (!*s) return s-a; - for (w = (void const*)s; !HASZERO(*w); w++); - s = (void const*)w; -#endif - for (; *s; s++); - return s-a; -} - -#define strlen_libc strlen - -#define IMPLS(f) \ - f(scalar) \ - f(scalar_autovec) \ - IFHOSTED(f(libc)) \ - f(musl) \ - MX(f, rvv_page_aligned) \ - MX(f, rvv) \ - - -typedef size_t Func(char const *s); - -#define DECLARE(f) extern Func strlen_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &strlen_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -ux last; - -void init(void) { - for (size_t i = 0; i < MAX_MEM; ++i) - mem[i] += !mem[i]; // remove null bytes -} - -ux checksum(size_t n) { return last; } - -BENCH(base) { - char *p = (char*)mem + (urand() % 511); - p[n] = 0; - TIME last = f(p); - p[n] = urand() | 1; -} BENCH_END - -Bench benches[] = { - { MAX_MEM - 521, "strlen", bench_base }, -}; BENCH_MAIN(impls, benches) - diff --git a/tests/rvv_bench/t1_runtime.patch b/tests/rvv_bench/t1_runtime.patch new file mode 100644 index 000000000..bf07e1f1a --- /dev/null +++ b/tests/rvv_bench/t1_runtime.patch @@ -0,0 +1,102 @@ +diff --git a/bench/bench.h b/bench/bench.h +index af1c839..661e8df 100644 +--- a/bench/bench.h ++++ b/bench/bench.h +@@ -120,45 +120,11 @@ static void + bench_run(Bench *benches, size_t nBenches) + { + for (Bench *b = benches; b != benches + nBenches; ++b) { +- print("{\ntitle: \"")(s,b->name)("\",\n"); +- print("labels: [\"0\","); +- for (size_t i = 0; i < b->nImpls; ++i) +- print("\"")(s,b->impls[i].name)("\","); +- print("],\n"); +- + size_t N = b->N; +- print("data: [\n["); +- for (size_t n = 1; n < N; n = BENCH_NEXT(n)) +- print(u,n)(","); +- print("],\n")(flush,); + + for (Impl *i = b->impls; i != b->impls + b->nImpls; ++i) { +- print("["); +- for (size_t n = 1; n < N; n = BENCH_NEXT(n)) { +- ux si = 0, s0 = 0; +- +-#if VALIDATE +- if (i != b->impls) { +- URand seed = randState; +- (void)b->func(i->func, n); +- si = checksum(n); +- +- randState = seed; +- (void)b->func(b->impls[0].func, n); +- s0 = checksum(n); +- } +- +- if (si != s0) { +- print("ERROR: ")(s,i->name)(" in ")(s,b->name)(" at ")(u,n)(flush,); +- exit(EXIT_FAILURE); +- } +-#endif +- +- print(f,bench_time(n, *i, *b))(",")(flush,); +- } +- print("],\n")(flush,); ++ bench_time(N, *i, *b); + } +- print("]\n},\n"); + } + } + +diff --git a/bench/config.h b/bench/config.h +index 0078049..a7a8cf6 100644 +--- a/bench/config.h ++++ b/bench/config.h +@@ -3,14 +3,14 @@ + #define HAS_F16 0 + + /* the maximum number of bytes to allocate, minimum of 4096 */ +-#define MAX_MEM (1024*1024*32) ++#define MAX_MEM (1024*4) + /* the byte count for the next run */ + #define NEXT(c) (c + c/7 + 3) + + /* minimum number of repeats, to sample median from */ +-#define MIN_REPEATS 10 ++#define MIN_REPEATS 1 + /* maxium number of repeats, executed until more than STOP_TIME has elapsed */ +-#define MAX_REPEATS 64 ++#define MAX_REPEATS 1 + + /* stop repeats early afer this many cycles have elapsed */ + #define STOP_CYCLES (1024*1024*500) +diff --git a/nolibc.h b/nolibc.h +index 94d4235..06f2c0f 100644 +--- a/nolibc.h ++++ b/nolibc.h +@@ -64,7 +64,7 @@ memread(void *ptr, size_t len) + return fread(ptr, 1, len, stdin); + } + #ifndef ENABLE_RDCYCLE_HACK +-int main(void) { ++int test(void) { + int x = nolibc_main(); + print_flush(); + exit(x); +@@ -158,13 +158,8 @@ void _start(void) { + static inline ux + rv_cycles(void) + { +- ux cycle; +-#ifdef READ_MCYCLE +- __asm volatile ("csrr %0, mcycle" : "=r"(cycle)); +-#else +- __asm volatile ("csrr %0, cycle" : "=r"(cycle)); +-#endif +- return cycle; ++ // TODO: support cycle ++ return 0; + } + + diff --git a/tests/rvv_bench/utf8_count/utf8_count.S b/tests/rvv_bench/utf8_count/utf8_count.S deleted file mode 100644 index 41a079693..000000000 --- a/tests/rvv_bench/utf8_count/utf8_count.S +++ /dev/null @@ -1,213 +0,0 @@ -#if 0 -size_t utf8_count_rvv(char const *buf, size_t len) { - size_t sum = 0; - for (size_t vl; len > 0; len -= vl, buf += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl); - vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); - sum += __riscv_vcpop_m_b1(mask, vl); - } - return sum; -} -#endif - -#ifdef MX - -.global MX(utf8_count_rvv_) -MX(utf8_count_rvv_): - li a2, 0 - li a3, -65 -1: - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a0) - vmsgt.vx v16, v8, a3 - vcpop.m a5, v16 - add a2, a2, a5 - sub a1, a1, a4 - add a0, a0, a4 - bnez a1, 1b - mv a0, a2 - ret - -.global MX(utf8_count_rvv_align_) -MX(utf8_count_rvv_align_): - mv a2, a0 - li a0, 0 - li a3, -65 - vsetvli t0, zero, e8, MX(), ta, ma # vlen - bltu a1, t0, 2f # len < vlen - # align dest to vlen - sub t1, zero, a2 - remu t1, t1, t0 # align = (-dest) % vlen - vsetvli t0, t1, e8, MX(), ta, ma -1: - vle8.v v8,(a2) - vmsgt.vx v16, v8, a3 - vcpop.m a4, v16 - add a0, a0, a4 - sub a1, a1, t0 - add a2, a2, t0 -2: - vsetvli t0, a1, e8, MX(), ta, ma - bnez a1, 1b - ret - -.global MX(utf8_count_rvv_tail_) -MX(utf8_count_rvv_tail_): - vsetvli t0, a1, e8, MX(), ta, ma - remu a2, a1, t0 # tail = n % vlenb - sub a1, a1, a2 # n -= tail - add a3, a0, a1 # end = dest + n - mv a1, a0 # n = dest - li a0, 0 - li t1, -65 -1: - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - add a1, a1, t0 # src += vlenb - bltu a1, a3, 1b # dest < end - # copy tail - vsetvli zero, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - ret - -# this is supposed to test how well the implementation handles -# operations with an vl smaller than VLMAX -.global MX(utf8_count_rvv_128_) -MX(utf8_count_rvv_128_): - li t0, 128/8 - bgt a1, t0, 1f - mv t0, a1 -1: - vsetvli t0, t0, e8, MX(), ta, ma - remu a2, a1, t0 # tail = n % vlenb - sub a1, a1, a2 # n -= tail - add a3, a0, a1 # end = dest + n - mv a1, a0 # n = dest - li a0, 0 - li t1, -65 -1: - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - add a1, a1, t0 # src += vlenb - bltu a1, a3, 1b # dest < end - # copy tail - vsetvli zero, a2, e8, MX(), ta, ma - vle8.v v8, (a1) - vmsgt.vx v16, v8, t1 - vcpop.m t2, v16 - add a0, a0, t2 - ret - - -.global MX(utf8_count_rvv_4x_) -MX(utf8_count_rvv_4x_): - mv a2, a0 - li a0, 0 - li a6, -65 -1: - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - vmsgt.vx v16, v8, a6 - vcpop.m a7, v16 - sub a1, a1, a4 - add a2, a2, a4 - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - vmsgt.vx v16, v8, a6 - vcpop.m a3, v16 - sub a1, a1, a4 - add a2, a2, a4 - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - vmsgt.vx v16, v8, a6 - vcpop.m a5, v16 - sub a1, a1, a4 - add a2, a2, a4 - vsetvli a4, a1, e8, MX(), ta, ma - vle8.v v8, (a2) - add a0, a0, a7 - add a0, a0, a3 - add a0, a0, a5 - vmsgt.vx v16, v8, a6 - vcpop.m a3, v16 - add a0, a0, a3 - sub a1, a1, a4 - add a2, a2, a4 - bnez a1, 1b - ret - -// gcc generated from unrolled intrinsics implementation: -// https://godbolt.org/z/q75c6r3Ta -.global MX(utf8_count_rvv_4x_tail_) -MX(utf8_count_rvv_4x_tail_): - vsetvli a5, zero, e8, MX(), ta, ma - slli t3, a5, 2 - add a1, a0, a1 - add a2, a0, t3 - mv a4, a0 - bltu a1, a2, 5f - slli t4, a5, 1 - add t5, t4, a5 - li a0, 0 - li a6, -65 -1: - add a3, a5, a4 - vsetvli zero, zero, e8, MX(), ta, ma - add a7, t4, a4 - vle8.v v8, (a4) - vle8.v v16, (a3) - vmsgt.vx v8, v8, a6 - vmsgt.vx v16, v16, a6 - vcpop.m a3, v8 - vcpop.m t1, v16 - add a3, a3, t1 - vle8.v v8, (a7) - add a4, t5, a4 - vmsgt.vx v8, v8, a6 - vcpop.m a7, v8 - add a3, a3, a7 - vle8.v v8, (a4) - mv a4, a2 - vmsgt.vx v8, v8, a6 - add a2, a2, t3 - vcpop.m a7, v8 - add a3, a3, a7 - add a0, a0, a3 - bgeu a1, a2, 1b -2: - sub a3, a1, a4 - beq a1, a4, 4f - li a2, 0 - li a1, -65 -3: - vsetvli a5, a3, e8, MX(), ta, ma - sub a3, a3, a5 - vle8.v v8, (a4) - add a4, a4, a5 - vmsgt.vx v8, v8, a1 - vcpop.m a5, v8 - add a2, a2, a5 - bne a3, zero, 3b - add a0, a0, a2 -4: - ret -5: - li a0, 0 - j 2b - - - - -#endif - - - - diff --git a/tests/rvv_bench/utf8_count/utf8_count.c b/tests/rvv_bench/utf8_count/utf8_count.c deleted file mode 100644 index ebe2e678c..000000000 --- a/tests/rvv_bench/utf8_count/utf8_count.c +++ /dev/null @@ -1,135 +0,0 @@ -#include "bench.h" - -size_t -utf8_count_scalar(char const *str, size_t len) -{ - uint8_t const *p = (uint8_t const*)str; - size_t count = 0; - while (len--) count += (*p++ & 0xc0) != 0x80, BENCH_CLOBBER(); - return count; -} - -size_t -utf8_count_scalar_autovec(char const *str, size_t len) -{ - uint8_t const *p = (uint8_t const*)str; - size_t count = 0; - while (len--) count += (*p++ & 0xc0) != 0x80; - return count; -} - -#define GEN_SWAR(name, popc, clobber) \ - size_t \ - utf8_count_##name(char const *str, size_t len) \ - { \ - ux const BENCH_MAY_ALIAS *u; \ - size_t count = 0, tail = 0; \ -\ - uint8_t const *u8 = (uint8_t const*)str; \ - if (len < sizeof *u) { \ - tail = len; \ - goto skip; \ - } \ -\ - tail = sizeof *u - (uintptr_t)str % sizeof *u; \ -\ - len -= tail; \ - while (tail--) \ - count += (*u8++ & 0xC0) != 0x80, clobber; \ -\ - u = (ux const*)u8; \ - tail = len % sizeof *u; \ -\ - for (len /= sizeof *u; len--; ++u) { \ - ux b1 = ~*u & (ux)0x8080808080808080; \ - ux b2 = *u & (ux)0x4040404040404040; \ - count += popc((b1 >> 1) | b2); \ - clobber; \ - } \ -\ - u8 = (uint8_t const*)u; \ - skip: \ - while (tail--) \ - count += (*u8++ & 0xC0) != 0x80, clobber; \ - return count; \ - } - -#if __riscv_zbb -GEN_SWAR(SWAR_popc,__builtin_popcountll,BENCH_CLOBBER()) -GEN_SWAR(SWAR_popc_autovec,__builtin_popcountll,(void)0) -# define POPC(f) f(SWAR_popc) f(SWAR_popc_autovec) -#else -# define POPC(f) -#endif - -static inline int -upopcnt(ux x) -{ - /* 2-bit sums */ - x -= (x >> 1) & (-(ux)1/3); - /* 4-bit sums */ - x = (x & (-(ux)1/15*3)) + ((x >> 2) & (-(ux)1/15*3)); - /* 8-bit sums */ - x = (x + (x >> 4)) & (-(ux)1/255*15); - BENCH_VOLATILE_REG(x); - /* now we can just add the sums together, because can't overflow, - * since there can't be more than 255 bits set */ - x += (x >> 8); /* 16-bit sums */ - x += (x >> 16); /* sum 16-bit sums */ - IF64(x += (x >> 32)); /* sum 32-bit sums */ - return x & 127; -} - - -GEN_SWAR(SWAR_popc_bithack,upopcnt,BENCH_CLOBBER()) -GEN_SWAR(SWAR_popc_bithack_autovec,upopcnt,(void)0) - - -#define IMPLS(f) \ - MX(f, rvv) \ - f(scalar) \ - f(scalar_autovec) \ - POPC(f) \ - f(SWAR_popc_bithack) \ - f(SWAR_popc_bithack_autovec) \ - MX(f, rvv_align) \ - MX(f, rvv_tail) \ - MX(f, rvv_128) \ - MX(f, rvv_4x) \ - MX(f, rvv_4x_tail) \ - -typedef size_t Func(char const *str, size_t len); - -#define DECLARE(f) extern Func utf8_count_##f; -IMPLS(DECLARE) - -#define EXTRACT(f) { #f, &utf8_count_##f }, -Impl impls[] = { IMPLS(EXTRACT) }; - -char *str; -ux last; - -void init(void) { } -ux checksum(size_t n) { return last; } - -void common(size_t n, size_t off) { - str = (char*)mem + off; - memrand(str, n + 9); -} - -BENCH(base) { - common(n, urand() & 511); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -BENCH(aligned) { - common(n, 0); - TIME last = (uintptr_t)f(str, n); -} BENCH_END - -Bench benches[] = { - { MAX_MEM - 521, "utf8 count", bench_base }, - { MAX_MEM - 521, "utf8 count aligned", bench_aligned } -}; BENCH_MAIN(impls, benches) - - From 02931ace5acb766f7c424083645f3084b048bfda Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Wed, 24 Jul 2024 02:22:00 +0800 Subject: [PATCH 121/140] [build system] add t1-rocketv in build system for link t1 with rocket --- build.sc | 31 +++++++++++++++++++++++++++++++ common.sc | 17 +++++++++++++++++ nix/t1/t1.nix | 1 + 3 files changed, 49 insertions(+) diff --git a/build.sc b/build.sc index 3aa7d8710..17f308090 100644 --- a/build.sc +++ b/build.sc @@ -135,6 +135,21 @@ trait RocketV def chiselIvy = None } +object t1rocket extends T1Rocket + +trait T1Rocket + extends millbuild.common.T1RocketModule + with ScalafmtModule { + def scalaVersion = T(v.scala) + def rocketModule = rocketv + def t1Module = t1 + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + object ipemu extends IPEmulator trait IPEmulator @@ -161,6 +176,20 @@ trait RocketEmulator extends millbuild.common.RocketEmulatorModule { def chiselIvy = None } +object t1rocketemu extends T1RocketEmulator + +trait T1RocketEmulator + extends millbuild.common.T1RocketEmulatorModule { + def scalaVersion = T(v.scala) + + def t1rocketModule = t1rocket + + def chiselModule = Some(chisel) + def chiselPluginJar = T(Some(chisel.pluginModule.jar())) + def chiselPluginIvy = None + def chiselIvy = None +} + object panamaconverter extends PanamaConverter trait PanamaConverter @@ -188,6 +217,8 @@ trait Elaborator ipemu, rocketv, rocketemu, + t1rocket, + t1rocketemu, ) def mainargsIvy = v.mainargs diff --git a/common.sc b/common.sc index 7f6e5d5c5..0f39a2376 100644 --- a/common.sc +++ b/common.sc @@ -90,6 +90,16 @@ trait RocketVModule def moduleDeps = super.moduleDeps ++ Seq(axi4Module, hardfloatModule) } +// Link T1 example: RocketV+T1 +trait T1RocketModule + extends ScalaModule + with HasChisel { + def rocketModule: ScalaModule + def t1Module: ScalaModule + + def moduleDeps = super.moduleDeps ++ Seq(rocketModule, t1Module) +} + trait EmuHelperModule extends ScalaModule with HasChisel @@ -101,6 +111,13 @@ trait IPEmulatorModule def moduleDeps = super.moduleDeps ++ Seq(t1Module) } +trait T1RocketEmulatorModule + extends ScalaModule + with HasChisel { + def t1rocketModule: ScalaModule + def moduleDeps = super.moduleDeps ++ Seq(t1rocketModule) +} + trait ElaboratorModule extends ScalaModule with HasChisel { diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index aafed98a2..6e6265b2a 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -30,6 +30,7 @@ let ./../../elaborator ./../../configgen/src ./../../rocketv + ./../../t1rocketv ./../../rocketemu/src ]; }; From b88ff708fc93680813723b45235581b87f55d381 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Wed, 24 Jul 2024 10:53:41 +0800 Subject: [PATCH 122/140] [t1rocket] draft Tile - generate parameter json: mill elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile config --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w - generate verilog: mill elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile design --parameter ./T1RocketTile.json --run-firtool --- elaborator/src/t1rocket/T1RocketTile.scala | 102 ++++ t1rocket/src/T1RocketTile.scala | 543 +++++++++++++++++++++ 2 files changed, 645 insertions(+) create mode 100644 elaborator/src/t1rocket/T1RocketTile.scala create mode 100644 t1rocket/src/T1RocketTile.scala diff --git a/elaborator/src/t1rocket/T1RocketTile.scala b/elaborator/src/t1rocket/T1RocketTile.scala new file mode 100644 index 000000000..3cb8398e2 --- /dev/null +++ b/elaborator/src/t1rocket/T1RocketTile.scala @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.elaborator.t1rocketv + +import chisel3.util.BitPat +import chisel3.util.experimental.BitSet +import mainargs._ +import org.chipsalliance.t1.elaborator.Elaborator +import org.chipsalliance.t1.rtl.vrf.RamType +import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} +import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} + +// --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_c --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w +object T1RocketTile extends Elaborator { + implicit object BitSetRead extends TokensReader.Simple[BitSet] { + def shortName = "bitset" + def read(strs: Seq[String]) = { + Right( + strs.head + .split(",") + .map { opt => + if (opt.contains("-")) { + val range = opt.split("-") + require(range.size == 2) + val from = BigInt(range.head, 16) + val to = BigInt(range.last, 16) + 1 + BitSet.fromRange(from, to - from, range.head.length * 4) + } else if (opt.contains("+")) { + val range = opt.split("\\+") + require(range.size == 2) + val from = BigInt(range.head, 16) + val length = BigInt(range.last, 16) + BitSet.fromRange(from, length, range.head.length * 4) + } else { + BitPat(s"b$opt") + } + } + .reduce(_.union(_)) + ) + } + } + + implicit object RamTypeRead extends TokensReader.Simple[RamType] { + def shortName = "ramtype" + def read(strs: Seq[String]) = { + Right( + strs.head match { + case "p0rw" => p0rw + case "p0rp1w" => p0rp1w + case "p0rwp1rw" => p0rwp1rw + } + ) + } + } + + @main + case class T1RocketTileParameterMain( + @arg(name = "instructionSets") instructionSets: Seq[String], + @arg(name = "cacheBlockBytes") cacheBlockBytes: Int, + @arg(name = "nPMPs") nPMPs: Int, + @arg(name = "cacheable") cacheable: BitSet, + @arg(name = "sideEffects") sideEffects: BitSet, + @arg(name = "dcacheNSets") dcacheNSets: Int, + @arg(name = "dcacheNWays") dcacheNWays: Int, + @arg(name = "dcacheRowBits") dcacheRowBits: Int, + @arg(name = "iCacheNSets") iCacheNSets: Int, + @arg(name = "iCacheNWays") iCacheNWays: Int, + @arg(name = "iCachePrefetch") iCachePrefetch: Boolean, + @arg(name = "dLen") dLen: Int, + @arg(name = "vrfBankSize") vrfBankSize: Int, + @arg(name = "vrfRamType") vrfRamType: RamType + ) { + def convert: T1RocketTileParameter = T1RocketTileParameter( + instructionSets: Seq[String], + cacheBlockBytes: Int, + nPMPs: Int, + cacheable: BitSet, + sideEffects: BitSet, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + dLen: Int, + vrfBankSize: Int, + vrfRamType: RamType + ) + } + + implicit def T1RocketTileParameterMainParser: ParserForClass[T1RocketTileParameterMain] = + ParserForClass[T1RocketTileParameterMain] + + @main + def config(@arg(name = "parameter") parameter: T1RocketTileParameterMain) = configImpl(parameter.convert) + + @main + def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) = + designImpl[T1RocketTile, T1RocketTileParameter](parameter, runFirtool.value) + + def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) +} diff --git a/t1rocket/src/T1RocketTile.scala b/t1rocket/src/T1RocketTile.scala new file mode 100644 index 000000000..452f76084 --- /dev/null +++ b/t1rocket/src/T1RocketTile.scala @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2024 Jiuyang Liu +package org.chipsalliance.t1.tile + +import chisel3._ +import chisel3.experimental.hierarchy.{Instance, Instantiate} +import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} +import chisel3.util.experimental.BitSet +import chisel3.util.log2Ceil +import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} +import org.chipsalliance.rocketv.{BHTParameter, FPU, FPUParameter, Frontend, FrontendParameter, HellaCache, HellaCacheArbiter, HellaCacheArbiterParameter, HellaCacheParameter, PTW, PTWParameter, Rocket, RocketParameter, RocketTileParameter} +import org.chipsalliance.rvdecoderdb.Instruction +import org.chipsalliance.t1.rtl.decoder.T1CustomInstruction +import org.chipsalliance.t1.rtl.vrf.RamType +import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} +import org.chipsalliance.t1.rtl.{LaneAdder, LaneAdderParam, LaneDiv, LaneDivFP, LaneDivFPParam, LaneDivParam, LaneFloat, LaneFloatParam, LaneMul, LaneMulParam, LaneShifter, LaneShifterParameter, LogicParam, MaskedLogic, OtherUnit, OtherUnitParam, T1, T1Parameter, VFUInstantiateParameter} + +object T1RocketTileParameter { + implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default + .readwriter[String] + .bimap[BitSet]( + bs => bs.terms.map("b" + _.rawString).mkString("\n"), + str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str) + ) + + implicit val vrfRamTypeP: upickle.default.ReadWriter[RamType] = upickle.default.ReadWriter.merge( + upickle.default.macroRW[p0rw.type], + upickle.default.macroRW[p0rp1w.type], + upickle.default.macroRW[p0rwp1rw.type] + ) + + implicit def rwP: upickle.default.ReadWriter[T1RocketTileParameter] = upickle.default.macroRW[T1RocketTileParameter] +} + +case class T1RocketTileParameter( + instructionSets: Seq[String], + cacheBlockBytes: Int, + nPMPs: Int, + cacheable: BitSet, + sideEffects: BitSet, + dcacheNSets: Int, + dcacheNWays: Int, + dcacheRowBits: Int, + iCacheNSets: Int, + iCacheNWays: Int, + iCachePrefetch: Boolean, + dLen: Int, + vrfBankSize: Int, + vrfRamType: RamType) + extends SerializableModuleParameter { + require(instructionSets.count(Seq("Zve32x", "Zve32f").contains) == 1, "at least support one Zve32x or Zve32f") + + val useAsyncReset: Boolean = false + val clockGate: Boolean = false + + val paddrBits: Int = xLen + // TODO: add S in the future + val priv: String = "m" + val hartIdLen: Int = 1 + val useBPWatch: Boolean = false + val mcontextWidth: Int = 0 + val scontextWidth: Int = 0 + val asidBits: Int = 0 + val resetVectorBits: Int = paddrBits + val nBreakpoints: Int = 0 + // TODO: set to 0 + val dtlbNSets: Int = 1 + val dtlbNWays: Int = 32 + val itlbNSets: Int = 1 + val itlbNWays: Int = 32 + val itlbNSectors: Int = 4 + val itlbNSuperpageEntries: Int = 4 + val nPTECacheEntries: Int = 9 + val nL2TLBWays: Int = 1 + val nL2TLBEntries: Int = 0 + // T1 doens't check exception. + val legal: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val read: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val write: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val putPartial: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val logic: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val arithmetic: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val exec: BitSet = BitSet.fromRange(0, 1 << paddrBits) + val btbEntries: Int = 28 + val btbNMatchBits: Int = 14 + val btbUpdatesOutOfOrder: Boolean = false + val nPages: Int = 6 + val nRAS: Int = 6 + val bhtParameter: Option[BHTParameter] = Some(BHTParameter(nEntries = 512, counterLength = 1, historyLength = 8, historyBits = 3)) + // TODO: remove it + val mulDivLatency: Int = 0 + val divUnroll: Int = 1 + val divEarlyOut: Boolean = false + val divEarlyOutGranularity: Int = 1 + val mulUnroll: Int = 1 + val mulEarlyOut: Boolean = false + val sfmaLatency: Int = 3 + val dfmaLatency: Int = 4 + val divSqrt: Boolean = true + // TODO: check decoder + val flushOnFenceI: Boolean = true + val fastLoadByte: Boolean = false + val fastLoadWord: Boolean = true + val maxUncachedInFlight: Int = 1 + val separateUncachedResp: Boolean = false + + + // calculate + def usingUser: Boolean = priv.contains("u") + + def usingSupervisor: Boolean = priv.contains("s") + + def vLen: Int = instructionSets.collectFirst { + case s"zvl${vlen}b" => vlen.toInt + }.get + + // static for now + def hasBeu: Boolean = false + + def usingNMI: Boolean = false + + def usingHypervisor: Boolean = false + + def usingDataScratchpad: Boolean = false + + def nLocalInterrupts: Int = 0 + + def dcacheArbPorts: Int = 2 + + def tagECC: Option[String] = None + + def dataECC: Option[String] = None + + def pgLevelBits: Int = 10 - log2Ceil(xLen / 32) + + def instructions: Seq[Instruction] = + org.chipsalliance.rvdecoderdb + .instructions( + org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader) + ) + .filter(instruction => + ( + instructionSets ++ + // Four mandatory instruction sets. + Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system") + ).contains(instruction.instructionSet.name) + ) + .toSeq + .filter { + // special case for rv32 pseudo from rv64 + case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true + case i if i.pseudoFrom.isDefined => false + case _ => true + } + .sortBy(i => (i.instructionSet.name, i.name)) + + private def hasInstructionSet(setName: String): Boolean = + instructions.flatMap(_.instructionSets.map(_.name)).contains(setName) + + def usingBTB: Boolean = btbEntries > 0 + + def xLen: Int = + (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match { + case (true, true) => throw new Exception("cannot support both rv32 and rv64 together") + case (true, false) => 32 + case (false, true) => 64 + case (false, false) => throw new Exception("no basic instruction found.") + } + + def fLen: Option[Int] = + ( + hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"), + hasInstructionSet("rv_d") || hasInstructionSet("rv64_d") + ) match { + case (false, false) => None + case (true, false) => Some(32) + case (false, true) => Some(64) + case (true, true) => Some(64) + } + + def usingVM = hasInstructionSet("sfence.vma") + + def pgLevels: Int = xLen match { + case 32 => 2 + case 64 => 3 + } + + def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a") + + def usingCompressed = hasInstructionSet("rv_c") + + def minFLen: Option[Int] = + if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh")) + Some(16) + else + fLen + + def rocketParameter: RocketParameter = RocketParameter( + useAsyncReset, + clockGate, + instructionSets.toSet, + vLen, + usingUser, + hartIdLen, + nPMPs, + asidBits, + nBreakpoints, + usingBTB, + useBPWatch, + mcontextWidth, + scontextWidth, + mulDivLatency, + divUnroll, + divEarlyOut, + divEarlyOutGranularity, + mulUnroll, + mulEarlyOut, + paddrBits, + cacheBlockBytes, + hasBeu, + fastLoadByte, + fastLoadWord, + dcacheNSets, + flushOnFenceI, + usingT1 = true + ) + + def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + usingVM: Boolean, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNWays: Int, + dcacheNSets: Int, + dcacheRowBits: Int, + dtlbNSets: Int, + dtlbNWays: Int, + tagECC: Option[String], + dataECC: Option[String], + maxUncachedInFlight: Int, + separateUncachedResp: Boolean, + legal: BitSet, + cacheable: BitSet, + read: BitSet, + write: BitSet, + putPartial: BitSet, + logic: BitSet, + arithmetic: BitSet, + exec: BitSet, + sideEffects: BitSet + ) + + def hellaCacheArbiterParameter: HellaCacheArbiterParameter = HellaCacheArbiterParameter( + useAsyncReset: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + cacheBlockBytes: Int, + dcacheNSets: Int, + usingVM: Boolean, + separateUncachedResp: Boolean + ) + + def ptwParameter: PTWParameter = PTWParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + xLen: Int, + fLen.getOrElse(0): Int, + paddrBits: Int, + asidBits: Int, + pgLevels: Int, + nPTECacheEntries: Int, + nL2TLBWays: Int, + nL2TLBEntries: Int, + nPMPs: Int + ) + + def frontendParameter: FrontendParameter = FrontendParameter( + useAsyncReset = useAsyncReset: Boolean, + clockGate = clockGate: Boolean, + xLen = xLen: Int, + usingAtomics = usingAtomics: Boolean, + usingDataScratchpad = usingDataScratchpad: Boolean, + usingVM = usingVM: Boolean, + usingCompressed = usingCompressed: Boolean, + usingBTB = usingBTB: Boolean, + itlbNSets = itlbNSets: Int, + itlbNWays = itlbNWays: Int, + itlbNSectors = itlbNSectors: Int, + itlbNSuperpageEntries = itlbNSuperpageEntries: Int, + blockBytes = cacheBlockBytes: Int, + iCacheNSets = iCacheNSets: Int, + iCacheNWays = iCacheNWays: Int, + iCachePrefetch = iCachePrefetch: Boolean, + btbEntries = btbEntries: Int, + btbNMatchBits = btbNMatchBits: Int, + btbUpdatesOutOfOrder = btbUpdatesOutOfOrder: Boolean, + nPages = nPages: Int, + nRAS = nRAS: Int, + nPMPs = nPMPs: Int, + paddrBits = paddrBits: Int, + pgLevels = pgLevels: Int, + asidBits = asidBits: Int, + bhtParameter = bhtParameter: Option[BHTParameter], + legal = legal: BitSet, + cacheable = cacheable: BitSet, + read = read: BitSet, + write = write: BitSet, + putPartial = putPartial: BitSet, + logic = logic: BitSet, + arithmetic = arithmetic: BitSet, + exec = exec: BitSet, + sideEffects = sideEffects: BitSet + ) + + def fpuParameter: Option[FPUParameter] = fLen.zip(minFLen).map { + case (fLen, minFLen) => + FPUParameter( + useAsyncReset: Boolean, + clockGate: Boolean, + xLen: Int, + fLen: Int, + minFLen: Int, + sfmaLatency: Int, + dfmaLatency: Int, + divSqrt: Boolean, + hartIdLen: Int + ) + } + + val vfuInstantiateParameter = if (instructionSets.contains("Zve32f")) + VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq(), + divfpModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + ) else + VFUInstantiateParameter( + slotCount = 4, + logicModuleParameters = Seq( + (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3)) + ), + aluModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)), + (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3)) + ), + shifterModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3)) + ), + mulModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3)) + ), + divModuleParameters = Seq( + (SerializableModuleGenerator(classOf[LaneDiv], LaneDivParam(32, 1)), Seq(0, 1, 2, 3)) + ), + divfpModuleParameters = Seq(), + otherModuleParameters = + Seq(( + SerializableModuleGenerator( + classOf[OtherUnit], + OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) + ), + Seq(0, 1, 2, 3))), + floatModuleParameters = Seq() + ) + + def t1Parameter: T1Parameter = T1Parameter( + vLen = vLen, + dLen = dLen, + extensions = instructionSets.filter(Seq("Zve32x", "Zve32f").contains), + // empty for now. + t1customInstructions = Seq(), + vrfBankSize = vrfBankSize, + vrfRamType = vrfRamType, + vfuInstantiateParameter = vfuInstantiateParameter + ) + + def instructionFetchParameter: AXI4BundleParameter = frontendParameter.instructionFetchParameter + + def itimParameter: Option[AXI4BundleParameter] = frontendParameter.itimParameter + + def loadStoreParameter: AXI4BundleParameter = hellaCacheParameter.loadStoreParameter + + def dtimParameter: Option[AXI4BundleParameter] = hellaCacheParameter.dtimParameter + + def t1HighBandwidthParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter + + def t1HightOutstandingParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter.copy(dataWidth = 32) +} + +class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { + val clock = Input(Clock()) + val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) + // todo: Const + val hartid = Flipped(UInt(parameter.hartIdLen.W)) + val resetVector = Input(Const(UInt(parameter.resetVectorBits.W))) + + val debug: Bool = Input(Bool()) + val mtip: Bool = Input(Bool()) + val msip: Bool = Input(Bool()) + val meip: Bool = Input(Bool()) + val seip: Option[Bool] = Option.when(parameter.usingSupervisor)(Bool()) + val lip: Vec[Bool] = Vec(parameter.nLocalInterrupts, Bool()) + val nmi = Option.when(parameter.usingNMI)(Bool()) + val nmiInterruptVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + val nmiIxceptionVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W)) + // TODO: buserror should be handled by NMI + val buserror: Bool = Input(Bool()) + val wfi: Bool = Output(Bool()) + val halt: Bool = Output(Bool()) + + val instructionFetchAXI: AXI4ROIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter) + val itimAXI: Option[AXI4RWIrrevocable] = + parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val loadStoreAXI: AXI4RWIrrevocable = + org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter) + val dtimAXI: Option[AXI4RWIrrevocable] = + parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p))) + + val highBandwidthAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HighBandwidthParameter) + val highOutstandingAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HightOutstandingParameter) +} + +class T1RocketTile(val parameter: T1RocketTileParameter) + extends FixedIORawModule(new T1RocketTileInterface(parameter)) + with SerializableModule[T1RocketTileParameter] { + val rocket: Instance[Rocket] = Instantiate(new Rocket(parameter.rocketParameter)) + val frontend: Instance[Frontend] = Instantiate(new Frontend(parameter.frontendParameter)) + val hellaCache: Instance[HellaCache] = Instantiate(new HellaCache(parameter.hellaCacheParameter)) + val hellaCacheArbiter: Instance[HellaCacheArbiter] = Instantiate( + new HellaCacheArbiter(parameter.hellaCacheArbiterParameter) + ) + val ptw: Instance[PTW] = Instantiate(new PTW(parameter.ptwParameter)) + val fpu: Option[Instance[FPU]] = parameter.fpuParameter.map(fpuParameter => Instantiate(new FPU(fpuParameter))) + val t1: Instance[T1] = Instantiate(new T1(parameter.t1Parameter)) + + rocket.io.clock := io.clock + rocket.io.reset := io.reset + rocket.io.hartid := io.hartid + rocket.io.interrupts.debug := io.debug + rocket.io.interrupts.mtip := io.mtip + rocket.io.interrupts.msip := io.msip + rocket.io.interrupts.meip := io.meip + rocket.io.interrupts.seip.foreach(_ := io.seip.get) + rocket.io.interrupts.lip := io.lip + rocket.io.interrupts.nmi.foreach { nmi => + nmi.rnmi := io.nmi.get + nmi.rnmi_interrupt_vector := io.nmiInterruptVector.get + nmi.rnmi_exception_vector := io.nmiIxceptionVector.get + } + // @todo make it optional + rocket.io.buserror := io.buserror + io.wfi := rocket.io.wfi + io.loadStoreAXI <> hellaCache.io.loadStoreAXI + io.dtimAXI.zip(hellaCache.io.dtimAXI).foreach { case (io, hellaCache) => io <> hellaCache } + io.instructionFetchAXI <> frontend.io.instructionFetchAXI + io.itimAXI.zip(frontend.io.itimAXI).foreach { case (io, frontend) => io <> frontend } + // design for halt and beu, only use the halt function for now. + io.halt := Seq(frontend.io.nonDiplomatic.errors.uncorrectable, hellaCache.io.errors.uncorrectable) + .flatMap(_.map(_.valid)) + .foldLeft(false.B)(_ || _) + + // rocket core io + rocket.io.imem <> frontend.io.nonDiplomatic.cpu + hellaCacheArbiter.io.requestor(0) <> rocket.io.dmem + rocket.io.ptw <> ptw.io.dpath + rocket.io.fpu.zip(fpu.map(_.io.core)).foreach { case (core, fpu) => core <> fpu } + // match connect + t1.io.issue <> rocket.io.t1.get.issue + rocket.io.t1.get.retire <> t1.io.retire + // used by trace module + rocket.io.bpwatch := DontCare + // don't use for now, this is design for report the custom cease status. + // rocket.io.cease + // it will be used in the future w/ trace support. + rocket.io.traceStall := false.B + + // frontend io + frontend.io.clock := io.clock + frontend.io.reset := io.reset + frontend.io.resetVector := io.resetVector + ptw.io.requestor(0) <> frontend.io.nonDiplomatic.ptw + + // hellacache io + hellaCache.io.clock := io.clock + hellaCache.io.reset := io.reset + ptw.io.requestor(1) <> hellaCache.io.ptw + hellaCache.io.cpu <> hellaCacheArbiter.io.mem + + // ptw io + ptw.io.clock := io.clock + ptw.io.reset := io.reset + hellaCacheArbiter.io.requestor(1) <> ptw.io.mem + + // hellacache arbiter io + hellaCacheArbiter.io.clock := io.clock + hellaCacheArbiter.io.reset := io.reset + + fpu.foreach { fpu => + fpu.io.clock := io.clock + fpu.io.reset := io.reset + // @todo: remove it from FPU. + fpu.io.cp_req <> DontCare + fpu.io.cp_resp <> DontCare + } + t1.io.clock := io.clock + t1.io.reset := io.reset + io.highBandwidthAXI <> t1.io.highBandwidthLoadStorePort + io.highOutstandingAXI <> t1.io.indexedLoadStorePort +} From 871eb4e79e5179b8421fdae0f261f215140c17f6 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Sun, 4 Aug 2024 02:20:42 +0800 Subject: [PATCH 123/140] [t1rocket] draft Testbench --- elaborator/src/Main.scala | 11 +++++ t1rocketemu/src/TestBench.scala | 76 +++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 t1rocketemu/src/TestBench.scala diff --git a/elaborator/src/Main.scala b/elaborator/src/Main.scala index 17ee483a2..3a93c5807 100644 --- a/elaborator/src/Main.scala +++ b/elaborator/src/Main.scala @@ -7,6 +7,7 @@ import mainargs._ import org.chipsalliance.t1.rtl.T1Parameter import org.chipsalliance.rocketv.RocketTileParameter import chisel3.panamalib.option._ +import org.chipsalliance.t1.tile.T1RocketTileParameter object Main { implicit object PathRead extends TokensReader.Simple[os.Path] { @@ -74,6 +75,13 @@ object Main { def parameter: RocketTileParameter = generator.parameter } + case class T1RocketConfig( + @arg(name = "t1rocket-config", short = 'c') rocketConfig: os.Path) { + def generator = upickle.default + .read[chisel3.experimental.SerializableModuleGenerator[org.chipsalliance.t1.tile.T1RocketTile, org.chipsalliance.t1.tile.T1RocketTileParameter]](ujson.read(os.read(rocketConfig))) + def parameter: T1RocketTileParameter = generator.parameter + } + implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] implicit def rocketConfig: ParserForClass[RocketConfig] = ParserForClass[RocketConfig] @@ -87,6 +95,9 @@ object Main { @main def rocketemu(elaborateConfig: ElaborateConfig, rocketConfig: RocketConfig): Unit = elaborateConfig.elaborate(() => new org.chipsalliance.t1.rocketv.TestBench(rocketConfig.generator) ) + @main def t1rocketemu(elaborateConfig: ElaborateConfig, t1rocketConfig: T1RocketConfig): Unit = elaborateConfig.elaborate(() => + new org.chipsalliance.t1.t1rocketemu.TestBench(t1rocketConfig.generator) + ) // format: on def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala new file mode 100644 index 000000000..602e579d9 --- /dev/null +++ b/t1rocketemu/src/TestBench.scala @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 Jiuyang Liu + +package org.chipsalliance.t1.t1rocketemu + +import chisel3.experimental.{BaseModule, ExtModule, SerializableModuleGenerator} +import chisel3.util.HasExtModuleInline +import chisel3.{Bool, ImplicitClock, ImplicitReset, Module, Output, RawModule} +import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} + +class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTileParameter]) + extends RawModule + with ImplicitClock + with ImplicitReset { + val clockGen = Module(new ExtModule with HasExtModuleInline { + + override def desiredName = "ClockGen" + setInline( + s"$desiredName.sv", + s"""module $desiredName(output reg clock, output reg reset); + | export "DPI-C" function dump_wave; + | function dump_wave(input string file); + |`ifdef VCS + | $$fsdbDumpfile(file); + | $$fsdbDumpvars("+all"); + | $$fsdbDumpon; + |`endif + |`ifdef VERILATOR + | $$dumpfile(file); + | $$dumpvars(0); + |`endif + | endfunction; + | + | import "DPI-C" context function void t1_cosim_init(); + | initial begin + | t1_cosim_init(); + | clock = 1'b0; + | reset = 1'b1; + | end + | initial #(11) reset = 1'b0; + | always #10 clock = ~clock; + |endmodule + |""".stripMargin + ) + val clock = IO(Output(Bool())) + val reset = IO(Output(Bool())) + }) + def clock = clockGen.clock.asClock + def reset = clockGen.reset + override def implicitClock = clockGen.clock.asClock + override def implicitReset = clockGen.reset + val dut: T1RocketTile with BaseModule = Module(generator.module()) + dut.io.clock := clock + dut.io.reset := reset + dut.io.hartid + dut.io.resetVector + dut.io.debug + dut.io.mtip + dut.io.msip + dut.io.meip + dut.io.seip + dut.io.lip + dut.io.nmi + dut.io.nmiInterruptVector + dut.io.nmiIxceptionVector + dut.io.buserror + dut.io.wfi + dut.io.halt + dut.io.instructionFetchAXI + dut.io.itimAXI + dut.io.loadStoreAXI + dut.io.dtimAXI + dut.io.dtimAXI + dut.io.highBandwidthAXI + dut.io.highOutstandingAXI +} \ No newline at end of file From 5af24d5919a9ae90328d6af019b05467c915fd28 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Sun, 4 Aug 2024 10:49:13 +0800 Subject: [PATCH 124/140] [t1rocket] fix elaborate nix develop ".#t1.elaborator.editable" -c mill -i elaborator.runMain org.chipsalliance.t1.elaborator.t1rocketv.T1RocketTile config --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w --instructionSets rv_c --- elaborator/src/Main.scala | 1 + nix/t1/t1.nix | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/elaborator/src/Main.scala b/elaborator/src/Main.scala index 3a93c5807..1e38d4f13 100644 --- a/elaborator/src/Main.scala +++ b/elaborator/src/Main.scala @@ -84,6 +84,7 @@ object Main { implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] implicit def rocketConfig: ParserForClass[RocketConfig] = ParserForClass[RocketConfig] + implicit def t1RocketConfig: ParserForClass[T1RocketConfig] = ParserForClass[T1RocketConfig] // format: off @main def ip(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index 6e6265b2a..b7eda72bd 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -30,7 +30,8 @@ let ./../../elaborator ./../../configgen/src ./../../rocketv - ./../../t1rocketv + ./../../t1rocket + ./../../t1rocketemu ./../../rocketemu/src ]; }; From 797d31d51dcfd0e2f28e8719e4a49ad54859f35e Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Sun, 4 Aug 2024 11:02:42 +0800 Subject: [PATCH 125/140] [t1rocket] update TestBench [t1rocket] update t1rocketemu TestBench.scala to build the config json --- t1rocketemu/src/AXI4SlaveAgent.scala | 203 +++++++++++++++++++++++++++ t1rocketemu/src/TestBench.scala | 127 +++++++++++++---- 2 files changed, 303 insertions(+), 27 deletions(-) create mode 100644 t1rocketemu/src/AXI4SlaveAgent.scala diff --git a/t1rocketemu/src/AXI4SlaveAgent.scala b/t1rocketemu/src/AXI4SlaveAgent.scala new file mode 100644 index 000000000..74da15a64 --- /dev/null +++ b/t1rocketemu/src/AXI4SlaveAgent.scala @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022-2024 Jiuyang Liu + +package org.chipsalliance.t1.t1rocketemu.dpi + +// TODO: upstream to AMBA as VIP +import chisel3._ +import chisel3.util.circt.dpi.{RawClockedVoidFunctionCall, RawUnclockedNonVoidFunctionCall} +import chisel3.util.{isPow2, log2Ceil} +import org.chipsalliance.amba.axi4.bundle.{ARChannel, ARFlowControl, AWChannel, AWFlowControl, AXI4BundleParameter, AXI4ROIrrevocableVerilog, AXI4RWIrrevocableVerilog, AXI4WOIrrevocableVerilog, BChannel, BFlowControl, RChannel, RFlowControl, WChannel, WFlowControl} + +case class AXI4SlaveAgentParameter(name: String, axiParameter: AXI4BundleParameter, outstanding: Int, readPayloadSize: Int, writePayloadSize: Int) + +class AXI4SlaveAgentInterface(parameter: AXI4SlaveAgentParameter) extends Bundle { + val clock: Clock = Input(Clock()) + val reset: Reset = Input(Reset()) + val channelId: UInt = Input(Const(UInt(64.W))) + // don't issue read DPI + val gateRead: Bool = Input(Bool()) + // don't issue write DPI + val gateWrite: Bool = Input(Bool()) + val channel = Flipped( + org.chipsalliance.amba.axi4.bundle.verilog.irrevocable(parameter.axiParameter) + ) +} + +class WritePayload(length: Int, dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) + // For dataWidth <= 8, align strb to u8 for a simple C-API + val strb = Vec(length, UInt(math.max(8, dataWidth / 8).W)) +} + +class ReadPayload(length: Int,dataWidth: Int) extends Bundle { + val data = Vec(length, UInt(dataWidth.W)) +} + +// consume transaction from DPI, drive RTL signal +class AXI4SlaveAgent(parameter: AXI4SlaveAgentParameter) + extends FixedIORawModule[AXI4SlaveAgentInterface](new AXI4SlaveAgentInterface(parameter)) { + dontTouch(io) + io.channel match { + case channel: AXI4RWIrrevocableVerilog => + new WriteManager(channel) + new ReadManager(channel) + case channel: AXI4ROIrrevocableVerilog => + new ReadManager(channel) + case channel: AXI4WOIrrevocableVerilog => + new WriteManager(channel) + } + + private class WriteManager( + channel: AWChannel with AWFlowControl with WChannel with WFlowControl with BChannel with BFlowControl) { + withClockAndReset(io.clock, io.reset) { + /** There is an aw in the register. */ + val awIssued = RegInit(false.B) + /** There is a w in the register. */ + val last = RegInit(false.B) + + /** memory to store the write payload + * @todo limit the payload size based on the RTL configuration. + */ + val writePayload = RegInit(0.U.asTypeOf(new WritePayload(parameter.writePayloadSize, parameter.axiParameter.dataWidth))) + /** AWID, latch at AW fire, used at B fire. */ + val awid = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWID))) + val awaddr = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWADDR))) + val awlen = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLEN))) + val awsize = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWSIZE))) + val awburst = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWBURST))) + val awlock = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWLOCK))) + val awcache = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWCACHE))) + val awprot = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWPROT))) + val awqos = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWQOS))) + val awregion = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWREGION))) + val awuser = RegInit(0.U.asTypeOf(chiselTypeOf(channel.AWUSER))) + + /** index the payload, used to write [[writePayload]] */ + val writeIdx = RegInit(0.U.asTypeOf(UInt(8.W))) + val bFire = channel.BREADY && channel.BVALID + val awFire = channel.AWREADY && channel.AWVALID + val wLastFire = channel.WVALID && channel.WREADY && channel.WLAST + val awExist = channel.AWVALID || awIssued + val wExist = channel.WVALID && channel.WLAST || last + + // AW + channel.AWREADY := !awIssued || (wExist && channel.BREADY) + when(channel.AWREADY && channel.AWVALID) { + awid := channel.AWID + awaddr := channel.AWADDR + awlen := channel.AWLEN + awsize := channel.AWSIZE + awburst := channel.AWBURST + awlock := channel.AWLOCK + awcache := channel.AWCACHE + awprot := channel.AWPROT + awqos := channel.AWQOS + awregion := channel.AWREGION + awuser := channel.AWUSER + } + when(awFire ^ bFire) { + awIssued := awFire + } + + // W + val writePayloadUpdate = WireDefault(writePayload) + channel.WREADY := !last || (awExist && channel.BREADY) + when(channel.WVALID && channel.WREADY) { + writePayload.data(writeIdx) := channel.WDATA + writePayloadUpdate.data(writeIdx) := channel.WDATA + writePayload.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writePayloadUpdate.strb(writeIdx) := channel.WSTRB.pad(writePayload.strb.getWidth) + writeIdx := writeIdx + 1.U + when(channel.WLAST) { + writeIdx := 0.U + } + } + when(wLastFire ^ bFire) { + last := wLastFire + } + + // B + channel.BVALID := awExist && wExist + channel.BID := Mux(awIssued, awid, channel.AWID) + channel.BRESP := 0.U(2.W) // OK + channel.BUSER := Mux(awIssued, awuser, channel.AWUSER) + when(channel.BVALID && channel.BREADY) { + RawClockedVoidFunctionCall(s"axi_write_${parameter.name}")( + io.clock, + when.cond && !io.gateWrite, + io.channelId, + // handle AW and W at same beat. + Mux(awIssued, awid.asTypeOf(UInt(64.W)), channel.AWID), + Mux(awIssued, awaddr.asTypeOf(UInt(64.W)), channel.AWADDR), + Mux(awIssued, awlen.asTypeOf(UInt(64.W)), channel.AWLEN), + Mux(awIssued, awsize.asTypeOf(UInt(64.W)), channel.AWSIZE), + Mux(awIssued, awburst.asTypeOf(UInt(64.W)), channel.AWBURST), + Mux(awIssued, awlock.asTypeOf(UInt(64.W)), channel.AWLOCK), + Mux(awIssued, awcache.asTypeOf(UInt(64.W)), channel.AWCACHE), + Mux(awIssued, awprot.asTypeOf(UInt(64.W)), channel.AWPROT), + Mux(awIssued, awqos.asTypeOf(UInt(64.W)), channel.AWQOS), + Mux(awIssued, awregion.asTypeOf(UInt(64.W)), channel.AWREGION), + writePayloadUpdate + ) + } + } + } + + private class ReadManager(channel: ARChannel with ARFlowControl with RChannel with RFlowControl) { + withClockAndReset(io.clock, io.reset) { + class CAMValue extends Bundle { + val arid = UInt(16.W) + val arlen = UInt(8.W) + val readPayload = new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth) + val readPayloadIndex = UInt(8.W) + val valid = Bool() + } + /** CAM to maintain order of read requests. This is maintained as FIFO. */ + val cam: Vec[CAMValue] = RegInit(0.U.asTypeOf(Vec(parameter.outstanding, new CAMValue))) + require(isPow2(parameter.outstanding), "Need to handle pointers") + val arPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + val rPtr = RegInit(0.U.asTypeOf(UInt(log2Ceil(parameter.outstanding).W))) + + // AR + channel.ARREADY := !cam(arPtr).valid + when(channel.ARREADY && channel.ARVALID) { + cam(arPtr).arid := channel.ARID + cam(arPtr).arlen := channel.ARLEN + cam(arPtr).readPayload := RawUnclockedNonVoidFunctionCall(s"axi_read_${parameter.name}", new ReadPayload(parameter.readPayloadSize, parameter.axiParameter.dataWidth))( + when.cond && !io.gateRead, + io.channelId, + channel.ARID.asTypeOf(UInt(64.W)), + channel.ARADDR.asTypeOf(UInt(64.W)), + channel.ARLEN.asTypeOf(UInt(64.W)), + channel.ARSIZE.asTypeOf(UInt(64.W)), + channel.ARBURST.asTypeOf(UInt(64.W)), + channel.ARLOCK.asTypeOf(UInt(64.W)), + channel.ARCACHE.asTypeOf(UInt(64.W)), + channel.ARPROT.asTypeOf(UInt(64.W)), + channel.ARQOS.asTypeOf(UInt(64.W)), + channel.ARREGION.asTypeOf(UInt(64.W)) + ) + cam(arPtr).readPayloadIndex := 0.U + cam(arPtr).valid := true.B + arPtr := arPtr + 1.U + } + + // R + channel.RVALID := cam(rPtr).valid + channel.RID := cam(rPtr).arid + channel.RDATA := cam(rPtr).readPayload.data(cam(rPtr).readPayloadIndex) + channel.RRESP := 0.U // OK + channel.RLAST := (cam(rPtr).arlen === cam(rPtr).readPayloadIndex) && cam(rPtr).valid + channel.RUSER := DontCare + when(channel.RREADY && channel.RVALID) { + // increase index + cam(rPtr).readPayloadIndex := cam(rPtr).readPayloadIndex + 1.U + when(channel.RLAST) { + cam(rPtr).valid := false.B + rPtr := rPtr + 1.U + } + } + } + } +} diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala index 602e579d9..ac17980f6 100644 --- a/t1rocketemu/src/TestBench.scala +++ b/t1rocketemu/src/TestBench.scala @@ -3,17 +3,20 @@ package org.chipsalliance.t1.t1rocketemu +import chisel3._ import chisel3.experimental.{BaseModule, ExtModule, SerializableModuleGenerator} +import chisel3.experimental.dataview.DataViewable +import chisel3.util.circt.dpi.RawUnclockedNonVoidFunctionCall import chisel3.util.HasExtModuleInline -import chisel3.{Bool, ImplicitClock, ImplicitReset, Module, Output, RawModule} +import org.chipsalliance.amba.axi4.bundle._ +import org.chipsalliance.t1.t1rocketemu.dpi._ import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTileParameter]) - extends RawModule + extends RawModule with ImplicitClock with ImplicitReset { val clockGen = Module(new ExtModule with HasExtModuleInline { - override def desiredName = "ClockGen" setInline( s"$desiredName.sv", @@ -31,9 +34,9 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil |`endif | endfunction; | - | import "DPI-C" context function void t1_cosim_init(); + | import "DPI-C" context function void cosim_init(); | initial begin - | t1_cosim_init(); + | cosim_init(); | clock = 1'b0; | reset = 1'b1; | end @@ -52,25 +55,95 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil val dut: T1RocketTile with BaseModule = Module(generator.module()) dut.io.clock := clock dut.io.reset := reset - dut.io.hartid - dut.io.resetVector - dut.io.debug - dut.io.mtip - dut.io.msip - dut.io.meip - dut.io.seip - dut.io.lip - dut.io.nmi - dut.io.nmiInterruptVector - dut.io.nmiIxceptionVector - dut.io.buserror - dut.io.wfi - dut.io.halt - dut.io.instructionFetchAXI - dut.io.itimAXI - dut.io.loadStoreAXI - dut.io.dtimAXI - dut.io.dtimAXI - dut.io.highBandwidthAXI - dut.io.highOutstandingAXI -} \ No newline at end of file + + val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + simulationTime := simulationTime + 1.U + + // get resetVector from simulator + dut.io.resetVector := RawUnclockedNonVoidFunctionCall("get_resetvector", Const(UInt(64.W)))(simulationTime === 0.U) + + dut.io.hartid := 0.U + dut.io.debug := 0.U + dut.io.mtip := 0.U + dut.io.msip := 0.U + dut.io.meip := 0.U + dut.io.buserror := 0.U + dut.io.lip := 0.U + dut.io.wfi := 0.U + dut.io.halt := 0.U + + // memory driver + Seq( + dut.io.highBandwidthAXI, // index 0 + dut.io.highOutstandingAXI // index 1 + ).map(_.viewAs[AXI4RWIrrevocableVerilog]) + .lazyZip( + Seq("highBandwidthAXI", "highOutstandingAXI") + ) + .zipWithIndex + .foreach { + case ((bundle: AXI4RWIrrevocableVerilog, channelName: String), index: Int) => + val agent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = channelName, + axiParameter = bundle.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ) + ).suggestName(s"axi4_channel${index}_${channelName}") + agent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> bundle + } + agent.io.clock := clock + agent.io.reset := reset + agent.io.channelId := index.U + agent.io.gateRead := false.B + agent.io.gateWrite := false.B + } + + val instFetchAXI = dut.io.instructionFetchAXI.viewAs[AXI4ROIrrevocableVerilog] + val instFetchAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "instructionFetchAXI", + axiParameter = instFetchAXI.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ).suggestName("axi4_channel2_instructionFetchAXI") + ) + instFetchAgent.io.channel match { + case io: AXI4ROIrrevocableVerilog => io <> instFetchAXI + } + instFetchAgent.io.clock := clock + instFetchAgent.io.reset := reset + instFetchAgent.io.channelId := 0.U + instFetchAgent.io.gateRead := false.B + instFetchAgent.io.gateWrite := false.B + + val loadStoreAXI = dut.io.loadStoreAXI.viewAs[AXI4RWIrrevocableVerilog] + val loadStoreAgent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = "loadStoreAXI", + axiParameter = loadStoreAXI.parameter, + outstanding = 4, + // TODO: add payloadSize config to parameter + readPayloadSize = 8, // todo: align with parameter in the future + writePayloadSize = 8 + ) + ).suggestName("axi4_channel3_loadStoreAXI") + ) + loadStoreAgent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> loadStoreAXI + } + loadStoreAgent.io.clock := clock + loadStoreAgent.io.reset := reset + loadStoreAgent.io.channelId := 3.U + loadStoreAgent.io.gateRead := false.B + loadStoreAgent.io.gateWrite := false.B +} From d3239e018b762d84ec79985b1a4afb3c6c6ff361 Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Sun, 4 Aug 2024 15:05:01 +0800 Subject: [PATCH 126/140] [t1rocket] migrate difftest framework from t1 and update dpi from rocket [t1rocket] refactor load_from_payload function to improve readability and performance [t1rocket] add elf crate dependency --- t1rocketemu/.clang-format | 236 ++ t1rocketemu/.gitignore | 1 + t1rocketemu/.rustfmt.toml | 4 + t1rocketemu/Cargo.lock | 664 ++++++ t1rocketemu/Cargo.toml | 25 + t1rocketemu/offline/Cargo.toml | 19 + t1rocketemu/offline/src/difftest.rs | 90 + t1rocketemu/offline/src/dut.rs | 48 + t1rocketemu/offline/src/json_events.rs | 409 ++++ t1rocketemu/offline/src/main.rs | 57 + t1rocketemu/online_dpi/Cargo.toml | 20 + t1rocketemu/online_dpi/src/dpi.rs | 321 +++ t1rocketemu/online_dpi/src/drive.rs | 337 +++ t1rocketemu/online_dpi/src/lib.rs | 44 + t1rocketemu/online_dpi/src/svdpi.rs | 50 + t1rocketemu/online_dpi/src/svdpi/sys.rs | 750 ++++++ t1rocketemu/online_dpi/src/svvpi.rs | 18 + t1rocketemu/online_dpi/src/svvpi/sys.rs | 2102 +++++++++++++++++ t1rocketemu/online_drive/Cargo.toml | 13 + t1rocketemu/online_drive/build.rs | 21 + t1rocketemu/online_drive/src/main.rs | 31 + .../verilator_shim/CMakeLists.txt | 38 + .../verilator_shim/verilator_shim.cc | 40 + t1rocketemu/online_vcs/Cargo.toml | 14 + t1rocketemu/online_vcs/default.nix | 48 + t1rocketemu/online_vcs/src/lib.rs | 2 + t1rocketemu/readme.md | 11 + t1rocketemu/spike_interfaces/CMakeLists.txt | 32 + t1rocketemu/spike_interfaces/default.nix | 11 + .../spike_interfaces-config.cmake | 3 + .../spike_interfaces/spike_interfaces.cc | 252 ++ .../spike_interfaces/spike_interfaces.h | 76 + .../spike_interfaces/spike_interfaces_c.h | 65 + t1rocketemu/spike_rs/Cargo.toml | 10 + t1rocketemu/spike_rs/build.rs | 18 + t1rocketemu/spike_rs/src/lib.rs | 287 +++ t1rocketemu/spike_rs/src/spike_event.rs | 523 ++++ t1rocketemu/spike_rs/src/util.rs | 65 + t1rocketemu/test_common/Cargo.toml | 11 + t1rocketemu/test_common/src/lib.rs | 63 + t1rocketemu/test_common/src/rtl_config.rs | 20 + t1rocketemu/test_common/src/spike_runner.rs | 141 ++ t1rocketemu/vcs.nix | 0 t1rocketemu/verilator.nix | 90 + 44 files changed, 7080 insertions(+) create mode 100644 t1rocketemu/.clang-format create mode 100644 t1rocketemu/.gitignore create mode 100644 t1rocketemu/.rustfmt.toml create mode 100644 t1rocketemu/Cargo.lock create mode 100644 t1rocketemu/Cargo.toml create mode 100644 t1rocketemu/offline/Cargo.toml create mode 100644 t1rocketemu/offline/src/difftest.rs create mode 100644 t1rocketemu/offline/src/dut.rs create mode 100644 t1rocketemu/offline/src/json_events.rs create mode 100644 t1rocketemu/offline/src/main.rs create mode 100644 t1rocketemu/online_dpi/Cargo.toml create mode 100644 t1rocketemu/online_dpi/src/dpi.rs create mode 100644 t1rocketemu/online_dpi/src/drive.rs create mode 100644 t1rocketemu/online_dpi/src/lib.rs create mode 100644 t1rocketemu/online_dpi/src/svdpi.rs create mode 100644 t1rocketemu/online_dpi/src/svdpi/sys.rs create mode 100644 t1rocketemu/online_dpi/src/svvpi.rs create mode 100644 t1rocketemu/online_dpi/src/svvpi/sys.rs create mode 100644 t1rocketemu/online_drive/Cargo.toml create mode 100644 t1rocketemu/online_drive/build.rs create mode 100644 t1rocketemu/online_drive/src/main.rs create mode 100644 t1rocketemu/online_drive/verilator_shim/CMakeLists.txt create mode 100644 t1rocketemu/online_drive/verilator_shim/verilator_shim.cc create mode 100644 t1rocketemu/online_vcs/Cargo.toml create mode 100644 t1rocketemu/online_vcs/default.nix create mode 100644 t1rocketemu/online_vcs/src/lib.rs create mode 100644 t1rocketemu/readme.md create mode 100644 t1rocketemu/spike_interfaces/CMakeLists.txt create mode 100644 t1rocketemu/spike_interfaces/default.nix create mode 100644 t1rocketemu/spike_interfaces/spike_interfaces-config.cmake create mode 100644 t1rocketemu/spike_interfaces/spike_interfaces.cc create mode 100644 t1rocketemu/spike_interfaces/spike_interfaces.h create mode 100644 t1rocketemu/spike_interfaces/spike_interfaces_c.h create mode 100644 t1rocketemu/spike_rs/Cargo.toml create mode 100644 t1rocketemu/spike_rs/build.rs create mode 100644 t1rocketemu/spike_rs/src/lib.rs create mode 100644 t1rocketemu/spike_rs/src/spike_event.rs create mode 100644 t1rocketemu/spike_rs/src/util.rs create mode 100644 t1rocketemu/test_common/Cargo.toml create mode 100644 t1rocketemu/test_common/src/lib.rs create mode 100644 t1rocketemu/test_common/src/rtl_config.rs create mode 100644 t1rocketemu/test_common/src/spike_runner.rs create mode 100644 t1rocketemu/vcs.nix create mode 100644 t1rocketemu/verilator.nix diff --git a/t1rocketemu/.clang-format b/t1rocketemu/.clang-format new file mode 100644 index 000000000..57d55c245 --- /dev/null +++ b/t1rocketemu/.clang-format @@ -0,0 +1,236 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAfterAttributes: Never +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Attach +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + diff --git a/t1rocketemu/.gitignore b/t1rocketemu/.gitignore new file mode 100644 index 000000000..9f970225a --- /dev/null +++ b/t1rocketemu/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/t1rocketemu/.rustfmt.toml b/t1rocketemu/.rustfmt.toml new file mode 100644 index 000000000..7b6c82e24 --- /dev/null +++ b/t1rocketemu/.rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = false +tab_spaces = 2 +chain_width = 100 +struct_lit_width = 50 \ No newline at end of file diff --git a/t1rocketemu/Cargo.lock b/t1rocketemu/Cargo.lock new file mode 100644 index 000000000..9e6740f4a --- /dev/null +++ b/t1rocketemu/Cargo.lock @@ -0,0 +1,664 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "cc" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b3edb18336f4df585bc9aa31dd99c036dfa5dc5e9a2939a722a188f3a8970d" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c09dd5ada6c6c78075d6fd0da3f90d8080651e2d6cc8eb2f1aaa4034ced708" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "spike_rs", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "offline" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "common", + "libloading", + "num-bigint", + "serde", + "serde_json", + "spike_rs", + "tracing", + "tracing-subscriber", + "xmas-elf", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "online_dpi" +version = "0.1.0" +dependencies = [ + "clap", + "common", + "elf", + "hex", + "spike_rs", + "tracing", +] + +[[package]] +name = "online_drive" +version = "0.1.0" +dependencies = [ + "cmake", + "online_dpi", +] + +[[package]] +name = "online_vcs" +version = "0.1.0" +dependencies = [ + "online_dpi", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.4", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spike_rs" +version = "0.1.0" +dependencies = [ + "anyhow", + "libc", + "tracing", + "xmas-elf", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "xmas-elf" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42c49817e78342f7f30a181573d82ff55b88a35f86ccaf07fc64b3008f56d1c6" +dependencies = [ + "zero", +] + +[[package]] +name = "zero" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fe21bcc34ca7fe6dd56cc2cb1261ea59d6b93620215aefb5ea6032265527784" diff --git a/t1rocketemu/Cargo.toml b/t1rocketemu/Cargo.toml new file mode 100644 index 000000000..b4488928c --- /dev/null +++ b/t1rocketemu/Cargo.toml @@ -0,0 +1,25 @@ +[workspace] +resolver = "2" +members = [ + "test_common", + "spike_rs", + "offline", + "online_dpi", + "online_drive", + "online_vcs", +] +exclude = [ + "spike_interfaces" +] + +[workspace.package] +version = "0.1.0" + +[workspace.dependencies] +anyhow = "1.0.79" +clap = { version = "4.4.18", features = ["derive"] } +tracing = "0.1.40" +tracing-subscriber = { version = "0.3", features = ["env-filter", "ansi"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +num-bigint = "0.4.6" diff --git a/t1rocketemu/offline/Cargo.toml b/t1rocketemu/offline/Cargo.toml new file mode 100644 index 000000000..2824a161e --- /dev/null +++ b/t1rocketemu/offline/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "offline" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +num-bigint = { workspace = true } + +libloading = "0.8.1" +xmas-elf = "0.9.1" + +common = { path = "../test_common" } +spike_rs = { path = "../spike_rs" } diff --git a/t1rocketemu/offline/src/difftest.rs b/t1rocketemu/offline/src/difftest.rs new file mode 100644 index 000000000..66a0173d9 --- /dev/null +++ b/t1rocketemu/offline/src/difftest.rs @@ -0,0 +1,90 @@ +use common::spike_runner::SpikeRunner; +use std::path::Path; +use tracing::info; + +use common::rtl_config::RTLConfig; +use common::CommonArgs; + +use crate::dut::Dut; +use crate::json_events::*; + +pub struct Difftest { + runner: SpikeRunner, + dut: Dut, + + #[allow(dead_code)] + config: RTLConfig, +} + +impl Difftest { + pub fn new(args: CommonArgs) -> Self { + let config = RTLConfig { vlen: args.vlen, dlen: args.dlen }; + Self { + runner: SpikeRunner::new(&args, true), + dut: Dut::new(Path::new( + &args.log_file.expect("difftest must be run with a log file"), + )), + config, + } + } + + pub fn diff(&mut self) -> anyhow::Result<()> { + self.runner.check_and_clear_fence(); + + let event = self.dut.step()?; + + match event { + JsonEvents::SimulationStart { cycle } => { + self.runner.cycle = *cycle; + Ok(()) + } + JsonEvents::SimulationStop { reason, cycle } => { + info!("simulation stopped at cycle {}, reason {}", cycle, reason); + self.runner.cycle = *cycle; + Ok(()) + } + JsonEvents::Issue { idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_issue(&IssueEvent { idx: *idx, cycle: *cycle }) + } + JsonEvents::MemoryWrite { mask, data, lsu_idx, address, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_memory_write(&MemoryWriteEvent { + mask: mask.clone(), + data: data.clone(), + lsu_idx: *lsu_idx, + address: *address, + cycle: *cycle, + }) + } + JsonEvents::LsuEnq { enq, cycle } => { + self.runner.cycle = *cycle; + self.runner.update_lsu_idx(&LsuEnqEvent { enq: *enq, cycle: *cycle }) + } + JsonEvents::VrfWrite { issue_idx, vd, offset, mask, data, lane, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_vrf_write(&VrfWriteEvent { + issue_idx: *issue_idx, + vd: *vd, + offset: *offset, + mask: mask.clone(), + data: data.clone(), + lane: *lane, + cycle: *cycle, + }) + } + JsonEvents::CheckRd { data, issue_idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.check_rd(&CheckRdEvent { data: *data, issue_idx: *issue_idx, cycle: *cycle }) + } + JsonEvents::VrfScoreboardReport { count, issue_idx, cycle } => { + self.runner.cycle = *cycle; + self.runner.vrf_scoreboard_report(&VrfScoreboardReportEvent { + count: *count, + issue_idx: *issue_idx, + cycle: *cycle, + }) + } + } + } +} diff --git a/t1rocketemu/offline/src/dut.rs b/t1rocketemu/offline/src/dut.rs new file mode 100644 index 000000000..a4cc80821 --- /dev/null +++ b/t1rocketemu/offline/src/dut.rs @@ -0,0 +1,48 @@ +use anyhow::Context; +use std::io::BufRead; +use std::path::Path; + +use crate::json_events::JsonEvents; + +#[derive(Debug)] +pub struct Dut { + events: Vec, + idx: u32, +} + +impl Dut { + fn read_json(path: &Path) -> anyhow::Result> { + let file = std::fs::File::open(path).unwrap(); + let reader = std::io::BufReader::new(file); + + let mut events = Vec::new(); + + for (i, line) in reader.lines().enumerate() { + let line = line.expect("line read error"); + if line.starts_with("{") { + // ignore illegal lines + let event: JsonEvents = serde_json::from_str(&line) + .with_context(|| format!("parsing {} line {}", path.display(), i + 1))?; + events.push(event); + } + } + + Ok(events) + } + + pub fn new(path: &Path) -> Self { + let events = Self::read_json(path).unwrap(); + let idx = 0; + Self { events, idx } + } + + pub fn step(&mut self) -> anyhow::Result<&JsonEvents> { + let event = match self.events.get(self.idx as usize) { + Some(event) => event, + None => return Err(anyhow::anyhow!("no more events")), + }; + self.idx += 1; + + Ok(event) + } +} diff --git a/t1rocketemu/offline/src/json_events.rs b/t1rocketemu/offline/src/json_events.rs new file mode 100644 index 000000000..24652f04d --- /dev/null +++ b/t1rocketemu/offline/src/json_events.rs @@ -0,0 +1,409 @@ +use common::spike_runner::SpikeRunner; +use num_bigint::BigUint; +use serde::{Deserialize, Deserializer}; +use spike_rs::spike_event::LSU_IDX_DEFAULT; +use tracing::{debug, info}; + +#[derive(Deserialize, Debug, PartialEq, Clone)] +pub enum Opcode { + PutFullData = 0, + PutPartialData = 1, + Get = 4, + // AccessAckData = 0, + // AccessAck = 0, +} + +fn bigint_to_vec_u8<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) + .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; + Ok(bigint.to_bytes_le()) +} + +fn bigint_to_vec_bool<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let bigint = BigUint::parse_bytes(s.trim_start().as_bytes(), 16) + .ok_or_else(|| serde::de::Error::custom("Failed to parse BigUint from hex string"))?; + let bytes = bigint.to_bytes_le(); + let bools = bytes.iter().flat_map(|byte| (0..8).map(move |i| (byte >> i) & 1u8 == 1u8)).collect(); + + Ok(bools) +} + +fn hex_to_u32<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let value = + u32::from_str_radix(s.trim_start_matches(' '), 16).map_err(serde::de::Error::custom)?; + + Ok(value) +} + +fn mask_display(mask: &Vec) -> String { + mask.into_iter().map(|&b| if b { '1' } else { '0' }).collect() +} + +#[derive(Deserialize, Debug)] +#[serde(tag = "event")] +pub(crate) enum JsonEvents { + SimulationStart { + cycle: u64, + }, + SimulationStop { + reason: u8, + cycle: u64, + }, + Issue { + idx: u8, + cycle: u64, + }, + LsuEnq { + enq: u32, + cycle: u64, + }, + VrfWrite { + issue_idx: u8, + vd: u32, + offset: u32, + #[serde(deserialize_with = "bigint_to_vec_bool", default)] + mask: Vec, + #[serde(deserialize_with = "bigint_to_vec_u8", default)] + data: Vec, + lane: u32, + cycle: u64, + }, + MemoryWrite { + #[serde(deserialize_with = "bigint_to_vec_bool", default)] + mask: Vec, + #[serde(deserialize_with = "bigint_to_vec_u8", default)] + data: Vec, + lsu_idx: u8, + #[serde(deserialize_with = "hex_to_u32", default)] + address: u32, + cycle: u64, + }, + CheckRd { + #[serde(deserialize_with = "hex_to_u32", default)] + data: u32, + issue_idx: u8, + cycle: u64, + }, + VrfScoreboardReport { + count: u32, + issue_idx: u8, + cycle: u64, + }, +} + +pub struct IssueEvent { + pub idx: u8, + pub cycle: u64, +} + +pub struct LsuEnqEvent { + pub enq: u32, + pub cycle: u64, +} + +pub struct VrfWriteEvent { + pub lane: u32, + pub vd: u32, + pub offset: u32, + pub mask: Vec, + pub data: Vec, + pub issue_idx: u8, + pub cycle: u64, +} + +pub struct MemoryWriteEvent { + pub mask: Vec, + pub data: Vec, + pub lsu_idx: u8, + pub address: u32, + pub cycle: u64, +} + +pub struct VrfScoreboardReportEvent { + pub count: u32, + pub issue_idx: u8, + pub cycle: u64, +} + +pub struct CheckRdEvent { + pub data: u32, + pub issue_idx: u8, + pub cycle: u64, +} + +pub(crate) trait JsonEventRunner { + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()>; + + fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()>; + + fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()>; + + fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()>; + + fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()>; + + fn check_and_clear_fence(&mut self); + + fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()>; + + fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()>; +} + +impl JsonEventRunner for SpikeRunner { + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()> { + self.find_v_se_to_issue(); // ensure the front of queue is a new un-issued se + let se = self.commit_queue.front_mut().unwrap(); + if se.is_vfence() { + return Ok(()); + } + + se.issue_idx = issue.idx as u8; + + info!( + "[{}] SpikePeekIssue: issue_idx={}, pc={:#x}, inst={}", + issue.cycle, issue.idx, se.pc, se.disasm + ); + + Ok(()) + } + + fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()> { + let enq = lsu_enq.enq; + assert!(enq > 0, "enq should be greater than 0"); + let cycle = lsu_enq.cycle; + + if let Some(se) = self + .commit_queue + .iter_mut() + .rev() + .find(|se| (se.is_vload() || se.is_vstore()) && se.lsu_idx == LSU_IDX_DEFAULT) + { + let index = enq.trailing_zeros() as u8; + se.lsu_idx = index; + info!( + "[{cycle}] UpdateLSUIdx: instr ({}) is allocated with lsu_idx: {index}", + se.describe_insn() + ); + } + Ok(()) + } + + fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()> { + let cycle = vrf_write.cycle; + let vlen_in_bytes = self.vlen / 8; + let lane_number = self.dlen / 32; + let record_idx_base = (vrf_write.vd * vlen_in_bytes + + (vrf_write.lane + lane_number * vrf_write.offset) * 4) as usize; + + let mut retire_issue: Option = None; + + if let Some(se) = + self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == vrf_write.issue_idx) + { + debug!( + "[{}] VrfWrite: lane={}, vd={}, idx_base={}, issue_idx={}, offset={}, mask={}, data={:x?} ({})", + vrf_write.cycle, + vrf_write.lane, + record_idx_base, + vrf_write.vd, + vrf_write.issue_idx, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data, + se.describe_insn() + ); + + if let Some(unretired_writes) = se.vrf_access_record.unretired_writes { + assert!( + unretired_writes > 0, + "[{}] unretired_writes should be greater than 0, issue_idx={} ({})", + vrf_write.cycle, + vrf_write.issue_idx, + se.describe_insn() + ); + if unretired_writes == 1 { + retire_issue = Some(vrf_write.issue_idx); + } + se.vrf_access_record.unretired_writes = Some(unretired_writes - 1); + } else { + se.vrf_access_record.retired_writes += 1; + } + + vrf_write.mask.iter().enumerate().filter(|(_, &mask)| mask).for_each(|(offset, _)| { + let written_byte = *vrf_write.data.get(offset).unwrap_or(&0); + + if let Some(record) = se.vrf_access_record.all_writes.get_mut(&(record_idx_base + offset)) { + assert_eq!( + record.byte, + written_byte, + "[{}] {offset}th byte incorrect ({:02x} record != {written_byte:02x} written) \ + for vrf write (lane={}, vd={}, offset={}, mask={}, data={:x?}) \ + issue_idx={} [vrf_idx={}] (disasm: {}, pc: {:#x}, bits: {:#x})", + vrf_write.cycle, + record.byte, + vrf_write.lane, + vrf_write.vd, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data, + se.issue_idx, + record_idx_base + offset, + se.disasm, + se.pc, + se.inst_bits + ); + record.executed = true; + } else { + debug!( + "[{}] cannot find vrf write record, maybe not changed (lane={}, vd={}, idx={}, offset={}, mask={}, data={:x?})", + vrf_write.cycle, + vrf_write.lane, + vrf_write.vd, + record_idx_base + offset, + vrf_write.offset, + mask_display(&vrf_write.mask), + vrf_write.data + ); + } + }) + } else { + info!( + "[{cycle}] RecordRFAccess: rtl detect vrf write on lane={}, vd={} \ + with no matched se (issue_idx={}), \ + maybe from committed load insn", + vrf_write.lane, vrf_write.vd, vrf_write.issue_idx + ); + } + + if let Some(issue_idx) = retire_issue { + self.retire(cycle, issue_idx).unwrap(); + } + + Ok(()) + } + + fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()> { + let data = memory_write.data.to_owned(); + let mask = memory_write.mask.to_owned(); + let cycle = memory_write.cycle; + let base_addr = memory_write.address; + let lsu_idx = memory_write.lsu_idx; + + if let Some(se) = self.commit_queue.iter_mut().find(|se| se.lsu_idx == lsu_idx) { + info!("[{cycle}] MemoryWrite: address={base_addr:08x}, size={}, data={data:x?}, mask={}, pc = {:#x}, disasm = {}", data.len(), mask_display(&mask), se.pc, se.disasm); + // compare with spike event record + mask.iter().enumerate() + .filter(|(_, &mask)| mask) + .for_each(|(offset, _)| { + let byte_addr = base_addr + offset as u32; + let data_byte = *data.get(offset).unwrap_or(&0); + let mem_write = + se.mem_access_record.all_writes.get_mut(&byte_addr).unwrap_or_else(|| { + panic!("[{cycle}] cannot find mem write of byte_addr {byte_addr:08x}") + }); + let single_mem_write_val = mem_write.writes[mem_write.num_completed_writes].val; + mem_write.num_completed_writes += 1; + assert_eq!(single_mem_write_val, data_byte, "[{cycle}] expect mem write of byte {single_mem_write_val:02X}, actual byte {data_byte:02X} (byte_addr={byte_addr:08X}, pc = {:#x}, disasm = {})", se.pc, se.disasm); + }); + return Ok(()); + } + + panic!("[{cycle}] cannot find se with instruction lsu_idx={lsu_idx}") + } + + fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()> { + let count = report.count; + let issue_idx = report.issue_idx; + let cycle = report.cycle; + + let mut should_retire: Option = None; + + if let Some(se) = self.commit_queue.iter_mut().rev().find(|se| se.issue_idx == issue_idx) { + assert!( + se.vrf_access_record.retired_writes <= count, + "[{cycle}] retired_writes({}) should be less than count({count}), issue_idx={issue_idx} ({})", + se.vrf_access_record.retired_writes, se.describe_insn() + ); + + // if instruction writes rd, it will retire in check_rd() + if count == se.vrf_access_record.retired_writes && !se.is_rd_written { + should_retire = Some(issue_idx); + } + // if all writes are committed, retire the se + se.vrf_access_record.unretired_writes = Some(count - se.vrf_access_record.retired_writes); + + info!( + "[{cycle}] VrfScoreboardReport: count={count}, issue_idx={issue_idx}, retired={} ({})", + se.vrf_access_record.retired_writes, + se.describe_insn() + ); + } else { + panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}"); + } + + if let Some(issue_idx) = should_retire { + self.retire(cycle, issue_idx).unwrap(); + } + + Ok(()) + } + + /// after update, if instructions before fence are cleared, fence is also cleared + fn check_and_clear_fence(&mut self) { + if !self.commit_queue.is_empty() { + let se = self.commit_queue.back().unwrap(); + + if se.is_vfence() && self.commit_queue.len() == 1 { + self.commit_queue.pop_back(); + } + } + } + + fn check_rd(&mut self, check_rd: &CheckRdEvent) -> anyhow::Result<()> { + let data = check_rd.data; + let cycle = check_rd.cycle; + let issue_idx = check_rd.issue_idx; + + let se = + self.commit_queue.iter_mut().find(|se| se.issue_idx == issue_idx).unwrap_or_else(|| { + panic!("[{cycle}] cannot find se with instruction issue_idx={issue_idx}") + }); + + info!("[{cycle}] CheckRd: issue_idx={issue_idx}, data={data:x?}"); + + se.check_rd(data).expect("Failed to check_rd"); + + self.retire(cycle, issue_idx).unwrap(); + + Ok(()) + } + + fn retire(&mut self, cycle: u64, issue_idx: u8) -> anyhow::Result<()> { + if let Some(idx) = self.commit_queue.iter().position(|se| se.issue_idx == issue_idx) { + if let Some(se) = self.commit_queue.remove(idx) { + info!( + "[{cycle}] Retire: retire se with issue_idx={issue_idx}, ({})", + se.describe_insn() + ); + se.check_is_ready_for_commit(cycle).unwrap(); + } else { + panic!("[{cycle}] Retire: cannot remove se with instruction issue_idx={issue_idx}") + } + } else { + panic!("[{cycle}] Retire: cannot find se with instruction issue_idx={issue_idx}") + } + Ok(()) + } +} diff --git a/t1rocketemu/offline/src/main.rs b/t1rocketemu/offline/src/main.rs new file mode 100644 index 000000000..0328e2cf3 --- /dev/null +++ b/t1rocketemu/offline/src/main.rs @@ -0,0 +1,57 @@ +mod difftest; +mod dut; +mod json_events; + +use clap::Parser; +use tracing::info; + +use common::spike_runner::SpikeRunner; +use common::CommonArgs; + +use crate::difftest::Difftest; + +fn run_spike(args: &CommonArgs) -> anyhow::Result<()> { + let mut count: u64 = 0; + + let spike = SpikeRunner::new(args, true); + loop { + count += 1; + if count % 1000000 == 0 { + info!("count = {}", count); + } + match spike.exec() { + Ok(_) => {} + Err(_) => { + info!("total v instrucions count = {}", count); + info!("Simulation quit graceful"); + return Ok(()); + } + }; + } +} + +fn main() -> anyhow::Result<()> { + // parse args + let args = CommonArgs::parse(); + + args.setup_logger()?; + + // if there is no log file, just run spike and quit + if args.log_file.is_none() { + run_spike(&args)?; + return Ok(()); + } + + // if there is a log file, run difftest + let mut diff = Difftest::new(args); + + loop { + match diff.diff() { + Ok(_) => {} + Err(e) => { + info!("Simulation quit/error with {}", e); + return Ok(()); + } + } + } +} diff --git a/t1rocketemu/online_dpi/Cargo.toml b/t1rocketemu/online_dpi/Cargo.toml new file mode 100644 index 000000000..6a6db720a --- /dev/null +++ b/t1rocketemu/online_dpi/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "online_dpi" +edition = "2021" +version.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +common = { path = "../test_common" } +spike_rs = { path = "../spike_rs" } +clap = { workspace = true } +tracing = { workspace = true } + +elf = "0.7.4" +hex = "0.4.3" + +[features] +sv2023 = [] +svvpi = [] +trace = [] diff --git a/t1rocketemu/online_dpi/src/dpi.rs b/t1rocketemu/online_dpi/src/dpi.rs new file mode 100644 index 000000000..b977b690d --- /dev/null +++ b/t1rocketemu/online_dpi/src/dpi.rs @@ -0,0 +1,321 @@ +#![allow(non_snake_case)] +#![allow(unused_variables)] + +use clap::Parser; +use std::ffi::{c_char, c_longlong, CString}; +use std::sync::Mutex; +use tracing::debug; + +use crate::drive::Driver; +use crate::svdpi::SvScope; +use crate::OfflineArgs; + +pub type SvBitVecVal = u32; + +// -------------------------- +// preparing data structures +// -------------------------- + +static DPI_TARGET: Mutex>> = Mutex::new(None); + +pub(crate) struct AxiReadPayload { + pub(crate) data: Vec, +} + +unsafe fn write_to_pointer(dst: *mut u8, data: &[u8]) { + let dst = std::slice::from_raw_parts_mut(dst, data.len()); + dst.copy_from_slice(data); +} + +unsafe fn fill_axi_read_payload(dst: *mut SvBitVecVal, dlen: u32, payload: &AxiReadPayload) { + let data_len = 256 * (dlen / 8) as usize; + assert!(payload.data.len() <= data_len); + write_to_pointer(dst as *mut u8, &payload.data); +} + +// Return (strobe in bit, data in byte) +unsafe fn load_from_payload( + payload: &*const SvBitVecVal, + data_width: usize, + dlen: usize, +) -> (&[u8], &[u8]) { + let src = *payload as *mut u8; + let data_width_in_byte = dlen / 8; + let strb_width_in_byte = dlen / data_width; + let payload_size_in_byte = strb_width_in_byte + data_width_in_byte; // data width in byte + let byte_vec = std::slice::from_raw_parts(src, payload_size_in_byte); + let strobe = &byte_vec[0..strb_width_in_byte]; + let data = &byte_vec[strb_width_in_byte..]; + + let strb_width_in_bit = data_width / 8; + let masks: Vec = strobe + .into_iter() + .flat_map(|strb| { + let mask: Vec = (0..strb_width_in_bit).map(|i| (strb & (1 << i)) != 0).collect(); + mask + }) + .collect(); + assert!( + masks.len() == data.len(), + "strobe bit width is not aligned with data byte width" + ); + + debug!( + "load {payload_size_in_byte} byte from payload: raw_data={} strb={} data={}", + hex::encode(byte_vec), + hex::encode(strobe), + hex::encode(data), + ); + + (&masks, data) +} + +//---------------------- +// dpi functions +//---------------------- + +/// evaluate after AW and W is finished at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_write_highBandwidthAXI( + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + // struct packed {bit [255:0][DLEN:0] data; + // bit [255:0][DLEN/8:0] strb; } payload + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_highBandwidth (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let data_width = 32; // TODO: get from driver + let (strobe, data) = load_from_payload(&payload, 32, driver.dlen); + driver.axi_write_high_bandwidth(awaddr as u32, awsize as u64, strobe, data); +} + +/// evaluate at AR fire at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_read_highBandwidthAXI( + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + // struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_highBandwidth (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_high_bandwidth(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response); +} + +/// evaluate after AW and W is finished at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_write_indexedAccessAXI( + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + // struct packed {bit [255:0][31:0] data; bit [255:0][3:0] strb; } payload + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_indexed (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let data_width = 32; // TODO: get from driver + let (strobe, data) = load_from_payload(&payload, data_width, 32); + driver.axi_write_indexed_access(awaddr as u32, awsize as u64, strobe, data); +} + +/// evaluate at AR fire at corresponding channel_id. +#[no_mangle] +unsafe extern "C" fn axi_read_indexedAccessAXI( + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + // struct packed {bit [255:0][DLEN:0] data; byte beats; } payload + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_indexed (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_indexed(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response); +} + +#[no_mangle] +unsafe extern "C" fn axi_write_loadStoreAXI( + target: *mut (), + channel_id: c_longlong, + awid: c_longlong, + awaddr: c_longlong, + awlen: c_longlong, + awsize: c_longlong, + awburst: c_longlong, + awlock: c_longlong, + awcache: c_longlong, + awprot: c_longlong, + awqos: c_longlong, + awregion: c_longlong, + payload: *const SvBitVecVal, +) { + debug!( + "axi_write_loadStore (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + awlen={awlen}, awsize=2^{awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awprot={awprot}, awqos={awqos}, awregion={awregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let data_width = 32; // TODO: get from sim + let (strobe, data) = load_from_payload(&payload, data_width, driver.dlen as usize); + driver.axi_write_load_store(awaddr as u32, awsize, strobe, data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_loadStoreAXI( + target: *mut (), + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_loadStoreAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_load_store(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response.data); +} + +#[no_mangle] +unsafe extern "C" fn axi_read_instructionFetchAXI( + target: *mut (), + channel_id: c_longlong, + arid: c_longlong, + araddr: c_longlong, + arlen: c_longlong, + arsize: c_longlong, + arburst: c_longlong, + arlock: c_longlong, + arcache: c_longlong, + arprot: c_longlong, + arqos: c_longlong, + arregion: c_longlong, + payload: *mut SvBitVecVal, +) { + debug!( + "axi_read_instructionFetchAXI (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ + arprot={arprot}, arqos={arqos}, arregion={arregion})" + ); + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + let response = driver.axi_read_instruction_fetch(araddr as u32, arsize as u64); + fill_axi_read_payload(payload, driver.dlen, &response.data); +} + +#[no_mangle] +unsafe extern "C" fn cosim_init() { + let args = OfflineArgs::parse(); + args.common_args.setup_logger().unwrap(); + + let scope = SvScope::get_current().expect("failed to get scope in cosim_init"); + + let driver = Box::new(Driver::new(scope, &args)); + let mut dpi_target = DPI_TARGET.lock().unwrap(); + assert!( + dpi_target.is_none(), + "cosim_init should be called only once" + ); + *dpi_target = Some(driver); +} + +#[no_mangle] +unsafe extern "C" fn get_resetvector(target: *mut (), resetvector: *mut c_longlong) { + if !target.is_null() { + let mut driver = DPI_TARGET.lock().unwrap(); + let driver = driver.as_mut().unwrap(); + *resetvector = driver.e_entry as c_longlong + } +} + +//-------------------------------- +// import functions and wrappers +//-------------------------------- + +mod dpi_export { + use std::ffi::c_char; + extern "C" { + #[cfg(feature = "trace")] + /// `export "DPI-C" function dump_wave(input string file)` + pub fn dump_wave(path: *const c_char); + } +} + +#[cfg(feature = "trace")] +pub(crate) fn dump_wave(scope: crate::svdpi::SvScope, path: &str) { + use crate::svdpi; + let path_cstring = CString::new(path).unwrap(); + + svdpi::set_scope(scope); + unsafe { + dpi_export::dump_wave(path_cstring.as_ptr()); + } +} diff --git a/t1rocketemu/online_dpi/src/drive.rs b/t1rocketemu/online_dpi/src/drive.rs new file mode 100644 index 000000000..2b996eecf --- /dev/null +++ b/t1rocketemu/online_dpi/src/drive.rs @@ -0,0 +1,337 @@ +use common::MEM_SIZE; +use elf::{ + abi::{EM_RISCV, ET_EXEC, PT_LOAD, STT_FUNC}, + endian::LittleEndian, + ElfStream, +}; +use spike_rs::util::load_elf_to_buffer; +use tracing::{debug, error, info, trace}; + +use crate::dpi::*; +use crate::get_t; +use crate::svdpi::SvScope; +use crate::OfflineArgs; + +struct ShadowMem { + mem: Vec, +} + +impl ShadowMem { + pub fn new() -> Self { + Self { mem: vec![0; MEM_SIZE] } + } + + pub fn read_mem(&self, addr: u32, size: u32) -> &[u8] { + let start = addr as usize; + let end = (addr + size) as usize; + &self.mem[start..end] + } + + // size: 1 << arsize + // bus_size: AXI bus width in bytes + // return: Vec with len=bus_size + // if size < bus_size, the result is padded due to AXI narrow transfer rules + pub fn read_mem_axi(&self, addr: u32, size: u32, bus_size: u32) -> Vec { + assert!( + addr % size == 0 && bus_size % size == 0, + "unaligned access addr={addr:#x} size={size}B dlen={bus_size}B" + ); + + let data = self.read_mem(addr, size); + if size < bus_size { + // narrow + let mut data_padded = vec![0; bus_size as usize]; + let start = (addr % bus_size) as usize; + let end = start + data.len(); + data_padded[start..end].copy_from_slice(data); + + data_padded + } else { + // normal + data.to_vec() + } + } + + // size: 1 << awsize + // bus_size: AXI bus width in bytes + // masks: write strokes, len=bus_size + // data: write data, len=bus_size + pub fn write_mem_axi( + &mut self, + addr: u32, + size: u32, + bus_size: u32, + masks: &[bool], + data: &[u8], + ) { + assert!( + addr % size == 0 && bus_size % size == 0, + "unaligned write access addr={addr:#x} size={size}B dlen={bus_size}B" + ); + + // handle strb=0 AXI payload + if !masks.iter().any(|&x| x) { + trace!("Mask 0 write detect"); + return; + } + + // TODO: we do not check strobe is compatible with (addr, awsize) + let addr_align = addr & ((!bus_size) + 1); + + let bus_size = bus_size as usize; + assert_eq!(bus_size, masks.len()); + assert_eq!(bus_size, data.len()); + + for i in 0..bus_size { + if masks[i] { + self.mem[addr_align as usize + i] = data[i]; + } + } + } +} + +pub(crate) struct Driver { + // SvScope from t1_cosim_init + scope: SvScope, + + #[cfg(feature = "trace")] + wave_path: String, + #[cfg(feature = "trace")] + dump_start: u64, + #[cfg(feature = "trace")] + dump_end: u64, + #[cfg(feature = "trace")] + dump_started: bool, + + pub(crate) dlen: u32, + pub(crate) e_entry: u64, + + shadow_mem: ShadowMem, +} + +#[cfg(feature = "trace")] +fn parse_range(input: &str) -> (u64, u64) { + if input.is_empty() { + return (0, 0); + } + + let parts: Vec<&str> = input.split(",").collect(); + + if parts.len() != 1 && parts.len() != 2 { + error!("invalid dump wave range: `{input}` was given"); + return (0, 0); + } + + const INVALID_NUMBER: &'static str = "invalid number"; + + if parts.len() == 1 { + return (parts[0].parse().expect(INVALID_NUMBER), 0); + } + + if parts[0].is_empty() { + return (0, parts[1].parse().expect(INVALID_NUMBER)); + } + + let start = parts[0].parse().expect(INVALID_NUMBER); + let end = parts[1].parse().expect(INVALID_NUMBER); + if start > end { + panic!("dump start is larger than end: `{input}`"); + } + + (start, end) +} + +impl Driver { + pub(crate) fn new(scope: SvScope, args: &OfflineArgs) -> Self { + #[cfg(feature = "trace")] + let (dump_start, dump_end) = parse_range(&args.dump_range); + + // pass e_entry to rocket + let (e_entry, shadow_mem, fn_sym_tab) = + Self::load_elf(&args.common_args.elf_file).expect("fail creating simulator"); + + Self { + scope, + + #[cfg(feature = "trace")] + wave_path: args.wave_path.to_owned(), + #[cfg(feature = "trace")] + dump_start, + #[cfg(feature = "trace")] + dump_end, + #[cfg(feature = "trace")] + dump_started: false, + + dlen: args.common_args.dlen, + e_entry, + + shadow_mem, + } + } + + pub fn load_elf(path: &Path) -> anyhow::Result<(u64, ShadowMem, FunctionSymTab)> { + let file = fs::File::open(path).with_context(|| "reading ELF file")?; + let mut elf: ElfStream = + ElfStream::open_stream(&file).with_context(|| "parsing ELF file")?; + + if elf.ehdr.e_machine != EM_RISCV { + anyhow::bail!("ELF is not in RISC-V"); + } + + if elf.ehdr.e_type != ET_EXEC { + anyhow::bail!("ELF is not an executable"); + } + + if elf.ehdr.e_phnum == 0 { + anyhow::bail!("ELF has zero size program header"); + } + + debug!("ELF entry: 0x{:x}", elf.ehdr.e_entry); + let mut mem = ShadowMem::new(); + elf.segments().iter().filter(|phdr| phdr.p_type == PT_LOAD).for_each(|phdr| { + let vaddr: usize = phdr.p_vaddr.try_into().expect("fail converting vaddr(u64) to usize"); + let filesz: usize = phdr.p_filesz.try_into().expect("fail converting p_filesz(u64) to usize"); + debug!( + "Read loadable segments 0x{:x}..0x{:x} to memory 0x{:x}", + phdr.p_offset, + phdr.p_offset + filesz as u64, + vaddr + ); + + // Load file start from offset into given mem slice + // The `offset` of the read_at method is relative to the start of the file and thus independent from the current cursor. + let mem_slice = &mut mem.mem[vaddr..vaddr + filesz]; + file.read_at(mem_slice, phdr.p_offset).unwrap_or_else(|err| { + panic!( + "fail reading ELF into mem with vaddr={}, filesz={}, offset={}. Error detail: {}", + vaddr, filesz, phdr.p_offset, err + ) + }); + }); + + // FIXME: now the symbol table doesn't contain any function value + let mut fn_sym_tab = FunctionSymTab::new(); + let symbol_table = + elf.symbol_table().with_context(|| "reading symbol table(SHT_SYMTAB) from ELF")?; + if let Some((parsed_table, string_table)) = symbol_table { + parsed_table + .iter() + // st_symtype = symbol.st_info & 0xf (But why masking here?) + .filter(|sym| sym.st_symtype() == STT_FUNC) + .for_each(|sym| { + let name = string_table + .get(sym.st_name as usize) + .unwrap_or_else(|_| panic!("fail to get name at st_name={}", sym.st_name)); + fn_sym_tab.insert( + sym.st_value, + FunctionSym { name: name.to_string(), info: sym.st_symtype() }, + ); + }); + } else { + debug!("load_elf: symtab not found"); + }; + + Ok((elf.ehdr.e_entry, mem, fn_sym_tab)) + } + + pub(crate) fn axi_read_high_bandwidth(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + let data = self.shadow_mem.read_mem_axi(addr, size, self.dlen / 8); + let data_hex = hex::encode(&data); + trace!( + "[{}] axi_read_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_write_high_bandwidth( + &mut self, + addr: u32, + awsize: u64, + strobe: &[bool], + data: &[u8], + ) { + let size = 1 << awsize; + + self.shadow_mem.write_mem_axi(addr, size, self.dlen / 8, &strobe, data); + let data_hex = hex::encode(data); + trace!( + "[{}] axi_write_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + } + + pub(crate) fn axi_read_indexed(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + assert!(size <= 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 4); + let data_hex = hex::encode(&data); + trace!( + "[{}] axi_read_indexed (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_write_indexed_access( + &mut self, + addr: u32, + awsize: u64, + strobe: &[bool], + data: &[u8], + ) { + let size = 1 << awsize; + self.shadow_mem.write_mem_axi(addr, size, 4, strobe, data); + let data_hex = hex::encode(data); + trace!( + "[{}] axi_write_indexed_access (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + } + + pub(crate) fn axi_read_load_store(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + assert!(size <= 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 4); + let data_hex = hex::encode(&data); + trace!( + "[{}] axi_read_load_store (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + pub(crate) fn axi_write_load_store( + &mut self, + addr: u32, + awsize: u64, + strobe: &[bool], + data: &[u8], + ) { + let size = 1 << awsize; + self.shadow_mem.write_mem_axi(addr, size, 4, strobe, data); + let data_hex = hex::encode(data); + trace!( + "[{}] axi_write_load_store (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + } + + pub(crate) fn axi_read_instruction_fetch(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + let size = 1 << arsize; + assert!(size <= 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 4); + let data_hex = hex::encode(&data); + trace!( + "[{}] axi_read_instruction_fetch (addr={addr:#x}, size={size}, data={data_hex})", + get_t() + ); + AxiReadPayload { data } + } + + #[cfg(feature = "trace")] + fn start_dump_wave(&mut self) { + dump_wave(self.scope, &self.wave_path); + } +} diff --git a/t1rocketemu/online_dpi/src/lib.rs b/t1rocketemu/online_dpi/src/lib.rs new file mode 100644 index 000000000..bafe6db0d --- /dev/null +++ b/t1rocketemu/online_dpi/src/lib.rs @@ -0,0 +1,44 @@ +use clap::Parser; +use common::CommonArgs; + +pub mod dpi; +pub mod drive; +pub mod svdpi; +#[cfg(feature = "svvpi")] +pub mod svvpi; + +#[derive(Parser)] +pub(crate) struct OfflineArgs { + #[command(flatten)] + pub common_args: CommonArgs, + + #[cfg(feature = "trace")] + #[arg(long)] + pub wave_path: String, + + #[cfg(feature = "trace")] + #[arg(long, default_value = "")] + pub dump_range: String, + + #[arg(long, default_value_t = 1000000)] + pub timeout: u64, +} + +// keep in sync with TestBench.ClockGen +pub const CYCLE_PERIOD: u64 = 20; + +/// get cycle +#[cfg(any(feature = "sv2023", feature = "svvpi"))] +pub fn get_t() -> u64 { + get_time() / CYCLE_PERIOD +} + +#[cfg(feature = "sv2023")] +pub fn get_time() -> u64 { + svdpi::get_time() +} + +#[cfg(all(not(feature = "sv2023"), feature = "svvpi"))] +pub fn get_time() -> u64 { + svvpi::get_time() +} \ No newline at end of file diff --git a/t1rocketemu/online_dpi/src/svdpi.rs b/t1rocketemu/online_dpi/src/svdpi.rs new file mode 100644 index 000000000..227626d79 --- /dev/null +++ b/t1rocketemu/online_dpi/src/svdpi.rs @@ -0,0 +1,50 @@ +use std::{ffi::{c_void, CString}, ptr::{self, NonNull}}; + +#[rustfmt::skip] +pub mod sys; + +/// get current simulation time in _simulation time unit_ +#[cfg(feature = "sv2023")] +pub fn get_time() -> u64 { + let mut time = sys::svTimeVal { + type_: sys::sv_sim_time as i32, + high: 0, + low: 0, + real: 0.0, + }; + unsafe { + let ret = sys::svGetTime(ptr::null_mut(), &mut time); + assert!(ret == 0, "svGetTime failed"); + } + + ((time.high as u64) << 32) + (time.low as u64) +} + +pub fn set_scope_by_name(name: &str) { + let name_cstr = CString::new(name).unwrap(); + unsafe { + let scope = sys::svGetScopeFromName(name_cstr.as_ptr()); + assert!(!scope.is_null(), "unrecognized scope `{name}`"); + sys::svSetScope(scope); + } +} + +pub fn set_scope(scope: SvScope) { + unsafe { + sys::svSetScope(scope.ptr.as_ptr()); + } +} + +#[derive(Debug, Clone, Copy)] +pub struct SvScope { + ptr: NonNull, +} + +unsafe impl Send for SvScope {} + +impl SvScope { + pub fn get_current() -> Option { + let ptr = unsafe { sys::svGetScope() }; + NonNull::new(ptr).map(|ptr| Self { ptr }) + } +} diff --git a/t1rocketemu/online_dpi/src/svdpi/sys.rs b/t1rocketemu/online_dpi/src/svdpi/sys.rs new file mode 100644 index 000000000..892d7534b --- /dev/null +++ b/t1rocketemu/online_dpi/src/svdpi/sys.rs @@ -0,0 +1,750 @@ +// modified from `bindgen --allowlist-item 'sv.*' svdpi.h` +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/* automatically generated by rust-bindgen 0.69.4 */ + +pub const sv_0: u32 = 0; +pub const sv_1: u32 = 1; +pub const sv_z: u32 = 2; +pub const sv_x: u32 = 3; +pub const sv_scaled_real_time: u32 = 1; +pub const sv_sim_time: u32 = 2; +pub type svScalar = u8; +pub type svBit = svScalar; +pub type svLogic = svScalar; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_vecval { + pub aval: u32, + pub bval: u32, +} +#[test] +fn bindgen_test_layout_t_vpi_vecval() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).aval) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(aval) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).bval) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(bval) + ) + ); +} +pub type s_vpi_vecval = t_vpi_vecval; +pub type svLogicVecVal = s_vpi_vecval; +pub type svBitVecVal = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_time { + pub type_: i32, + pub high: u32, + pub low: u32, + pub real: f64, +} +#[test] +fn bindgen_test_layout_t_vpi_time() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(t_vpi_time)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_time)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).high) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(high) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).low) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(low) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).real) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(real) + ) + ); +} +pub type s_vpi_time = t_vpi_time; +pub type svTimeVal = s_vpi_time; +extern "C" { + pub fn svDpiVersion() -> *const ::std::os::raw::c_char; +} +pub type svScope = *mut ::std::os::raw::c_void; +pub type svOpenArrayHandle = *mut ::std::os::raw::c_void; +extern "C" { + pub fn svGetBitselBit(s: *const svBitVecVal, i: ::std::os::raw::c_int) -> svBit; +} +extern "C" { + pub fn svGetBitselLogic(s: *const svLogicVecVal, i: ::std::os::raw::c_int) -> svLogic; +} +extern "C" { + pub fn svPutBitselBit(d: *mut svBitVecVal, i: ::std::os::raw::c_int, s: svBit); +} +extern "C" { + pub fn svPutBitselLogic(d: *mut svLogicVecVal, i: ::std::os::raw::c_int, s: svLogic); +} +extern "C" { + pub fn svGetPartselBit( + d: *mut svBitVecVal, + s: *const svBitVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetPartselLogic( + d: *mut svLogicVecVal, + s: *const svLogicVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartselBit( + d: *mut svBitVecVal, + s: svBitVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartselLogic( + d: *mut svLogicVecVal, + s: svLogicVecVal, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svLeft(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svRight(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svLow(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svHigh(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svIncrement(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svSize(h: svOpenArrayHandle, d: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svDimensions(h: svOpenArrayHandle) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svGetArrayPtr(arg1: svOpenArrayHandle) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svSizeOfArray(arg1: svOpenArrayHandle) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svGetArrElemPtr( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetArrElemPtr1( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetArrElemPtr2( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetArrElemPtr3( + arg1: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svPutBitArrElemVecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutBitArrElem1VecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem2VecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem3VecVal( + d: svOpenArrayHandle, + s: *const svBitVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElemVecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutLogicArrElem1VecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem2VecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem3VecVal( + d: svOpenArrayHandle, + s: *const svLogicVecVal, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElemVecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetBitArrElem1VecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem2VecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem3VecVal( + d: *mut svBitVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElemVecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetLogicArrElem1VecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem2VecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem3VecVal( + d: *mut svLogicVecVal, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int, ...) -> svBit; +} +extern "C" { + pub fn svGetBitArrElem1(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int) -> svBit; +} +extern "C" { + pub fn svGetBitArrElem2( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ) -> svBit; +} +extern "C" { + pub fn svGetBitArrElem3( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ) -> svBit; +} +extern "C" { + pub fn svGetLogicArrElem(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int, ...) -> svLogic; +} +extern "C" { + pub fn svGetLogicArrElem1(s: svOpenArrayHandle, indx1: ::std::os::raw::c_int) -> svLogic; +} +extern "C" { + pub fn svGetLogicArrElem2( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ) -> svLogic; +} +extern "C" { + pub fn svGetLogicArrElem3( + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ) -> svLogic; +} +extern "C" { + pub fn svPutLogicArrElem( + d: svOpenArrayHandle, + value: svLogic, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutLogicArrElem1(d: svOpenArrayHandle, value: svLogic, indx1: ::std::os::raw::c_int); +} +extern "C" { + pub fn svPutLogicArrElem2( + d: svOpenArrayHandle, + value: svLogic, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem3( + d: svOpenArrayHandle, + value: svLogic, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem(d: svOpenArrayHandle, value: svBit, indx1: ::std::os::raw::c_int, ...); +} +extern "C" { + pub fn svPutBitArrElem1(d: svOpenArrayHandle, value: svBit, indx1: ::std::os::raw::c_int); +} +extern "C" { + pub fn svPutBitArrElem2( + d: svOpenArrayHandle, + value: svBit, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem3( + d: svOpenArrayHandle, + value: svBit, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetScope() -> svScope; +} +extern "C" { + pub fn svSetScope(scope: svScope) -> svScope; +} +extern "C" { + pub fn svGetNameFromScope(arg1: svScope) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn svGetScopeFromName(scopeName: *const ::std::os::raw::c_char) -> svScope; +} +extern "C" { + pub fn svPutUserData( + scope: svScope, + userKey: *mut ::std::os::raw::c_void, + userData: *mut ::std::os::raw::c_void, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svGetUserData( + scope: svScope, + userKey: *mut ::std::os::raw::c_void, + ) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn svGetCallerInfo( + fileName: *mut *const ::std::os::raw::c_char, + lineNumber: *mut ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svIsDisabledState() -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svAckDisabledState(); +} +#[cfg(feature = "sv2023")] +extern "C" { + pub fn svGetTime(scope: svScope, time: *mut svTimeVal) -> ::std::os::raw::c_int; +} +#[cfg(feature = "sv2023")] +extern "C" { + pub fn svGetTimeUnit(scope: svScope, time_unit: *mut i32) -> ::std::os::raw::c_int; +} +#[cfg(feature = "sv2023")] +extern "C" { + pub fn svGetTimePrecision(scope: svScope, time_precision: *mut i32) -> ::std::os::raw::c_int; +} +pub type svBitVec32 = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct svLogicVec32 { + pub c: ::std::os::raw::c_uint, + pub d: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_svLogicVec32() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(svLogicVec32)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(svLogicVec32)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).c) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(svLogicVec32), + "::", + stringify!(c) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).d) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(svLogicVec32), + "::", + stringify!(d) + ) + ); +} +pub type svBitPackedArrRef = *mut ::std::os::raw::c_void; +pub type svLogicPackedArrRef = *mut ::std::os::raw::c_void; +extern "C" { + pub fn svSizeOfBitPackedArr(width: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svSizeOfLogicPackedArr(width: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn svPutBitVec32(d: svBitPackedArrRef, s: *const svBitVec32, w: ::std::os::raw::c_int); +} +extern "C" { + pub fn svPutLogicVec32( + d: svLogicPackedArrRef, + s: *const svLogicVec32, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitVec32(d: *mut svBitVec32, s: svBitPackedArrRef, w: ::std::os::raw::c_int); +} +extern "C" { + pub fn svGetLogicVec32(d: *mut svLogicVec32, s: svLogicPackedArrRef, w: ::std::os::raw::c_int); +} +extern "C" { + pub fn svGetSelectBit(s: svBitPackedArrRef, i: ::std::os::raw::c_int) -> svBit; +} +extern "C" { + pub fn svGetSelectLogic(s: svLogicPackedArrRef, i: ::std::os::raw::c_int) -> svLogic; +} +extern "C" { + pub fn svPutSelectBit(d: svBitPackedArrRef, i: ::std::os::raw::c_int, s: svBit); +} +extern "C" { + pub fn svPutSelectLogic(d: svLogicPackedArrRef, i: ::std::os::raw::c_int, s: svLogic); +} +extern "C" { + pub fn svGetPartSelectBit( + d: *mut svBitVec32, + s: svBitPackedArrRef, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBits( + s: svBitPackedArrRef, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ) -> svBitVec32; +} +extern "C" { + pub fn svGet32Bits(s: svBitPackedArrRef, i: ::std::os::raw::c_int) -> svBitVec32; +} +extern "C" { + pub fn svGet64Bits(s: svBitPackedArrRef, i: ::std::os::raw::c_int) -> u64; +} +extern "C" { + pub fn svGetPartSelectLogic( + d: *mut svLogicVec32, + s: svLogicPackedArrRef, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartSelectBit( + d: svBitPackedArrRef, + s: svBitVec32, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutPartSelectLogic( + d: svLogicPackedArrRef, + s: *const svLogicVec32, + i: ::std::os::raw::c_int, + w: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElemVec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutBitArrElem1Vec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem2Vec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutBitArrElem3Vec32( + d: svOpenArrayHandle, + s: *const svBitVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElemVec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svPutLogicArrElem1Vec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem2Vec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svPutLogicArrElem3Vec32( + d: svOpenArrayHandle, + s: *const svLogicVec32, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElemVec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetBitArrElem1Vec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem2Vec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetBitArrElem3Vec32( + d: *mut svBitVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElemVec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ... + ); +} +extern "C" { + pub fn svGetLogicArrElem1Vec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem2Vec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + ); +} +extern "C" { + pub fn svGetLogicArrElem3Vec32( + d: *mut svLogicVec32, + s: svOpenArrayHandle, + indx1: ::std::os::raw::c_int, + indx2: ::std::os::raw::c_int, + indx3: ::std::os::raw::c_int, + ); +} diff --git a/t1rocketemu/online_dpi/src/svvpi.rs b/t1rocketemu/online_dpi/src/svvpi.rs new file mode 100644 index 000000000..401f7f65a --- /dev/null +++ b/t1rocketemu/online_dpi/src/svvpi.rs @@ -0,0 +1,18 @@ +#[rustfmt::skip] +pub mod sys; + +use std::ptr; + +/// get current simulation time in _simulation time unit_ +pub fn get_time() -> u64 { + let mut time = sys::s_vpi_time { + type_: sys::vpiSimTime as i32, + high: 0, + low: 0, + real: 0.0, + }; + unsafe { + sys::vpi_get_time(ptr::null_mut(), &mut time); + } + ((time.high as u64) << 32) + (time.low as u64) +} diff --git a/t1rocketemu/online_dpi/src/svvpi/sys.rs b/t1rocketemu/online_dpi/src/svvpi/sys.rs new file mode 100644 index 000000000..c3d269855 --- /dev/null +++ b/t1rocketemu/online_dpi/src/svvpi/sys.rs @@ -0,0 +1,2102 @@ +// modified from `bindgen --allowlist-item 'vpi.*' sv_vpi_user.h` +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/* automatically generated by rust-bindgen 0.69.4 */ + +pub const vpiAlways: u32 = 1; +pub const vpiAssignStmt: u32 = 2; +pub const vpiAssignment: u32 = 3; +pub const vpiBegin: u32 = 4; +pub const vpiCase: u32 = 5; +pub const vpiCaseItem: u32 = 6; +pub const vpiConstant: u32 = 7; +pub const vpiContAssign: u32 = 8; +pub const vpiDeassign: u32 = 9; +pub const vpiDefParam: u32 = 10; +pub const vpiDelayControl: u32 = 11; +pub const vpiDisable: u32 = 12; +pub const vpiEventControl: u32 = 13; +pub const vpiEventStmt: u32 = 14; +pub const vpiFor: u32 = 15; +pub const vpiForce: u32 = 16; +pub const vpiForever: u32 = 17; +pub const vpiFork: u32 = 18; +pub const vpiFuncCall: u32 = 19; +pub const vpiFunction: u32 = 20; +pub const vpiGate: u32 = 21; +pub const vpiIf: u32 = 22; +pub const vpiIfElse: u32 = 23; +pub const vpiInitial: u32 = 24; +pub const vpiIntegerVar: u32 = 25; +pub const vpiInterModPath: u32 = 26; +pub const vpiIterator: u32 = 27; +pub const vpiIODecl: u32 = 28; +pub const vpiMemory: u32 = 29; +pub const vpiMemoryWord: u32 = 30; +pub const vpiModPath: u32 = 31; +pub const vpiModule: u32 = 32; +pub const vpiNamedBegin: u32 = 33; +pub const vpiNamedEvent: u32 = 34; +pub const vpiNamedFork: u32 = 35; +pub const vpiNet: u32 = 36; +pub const vpiNetBit: u32 = 37; +pub const vpiNullStmt: u32 = 38; +pub const vpiOperation: u32 = 39; +pub const vpiParamAssign: u32 = 40; +pub const vpiParameter: u32 = 41; +pub const vpiPartSelect: u32 = 42; +pub const vpiPathTerm: u32 = 43; +pub const vpiPort: u32 = 44; +pub const vpiPortBit: u32 = 45; +pub const vpiPrimTerm: u32 = 46; +pub const vpiRealVar: u32 = 47; +pub const vpiReg: u32 = 48; +pub const vpiRegBit: u32 = 49; +pub const vpiRelease: u32 = 50; +pub const vpiRepeat: u32 = 51; +pub const vpiRepeatControl: u32 = 52; +pub const vpiSchedEvent: u32 = 53; +pub const vpiSpecParam: u32 = 54; +pub const vpiSwitch: u32 = 55; +pub const vpiSysFuncCall: u32 = 56; +pub const vpiSysTaskCall: u32 = 57; +pub const vpiTableEntry: u32 = 58; +pub const vpiTask: u32 = 59; +pub const vpiTaskCall: u32 = 60; +pub const vpiTchk: u32 = 61; +pub const vpiTchkTerm: u32 = 62; +pub const vpiTimeVar: u32 = 63; +pub const vpiTimeQueue: u32 = 64; +pub const vpiUdp: u32 = 65; +pub const vpiUdpDefn: u32 = 66; +pub const vpiUserSystf: u32 = 67; +pub const vpiVarSelect: u32 = 68; +pub const vpiWait: u32 = 69; +pub const vpiWhile: u32 = 70; +pub const vpiAttribute: u32 = 105; +pub const vpiBitSelect: u32 = 106; +pub const vpiCallback: u32 = 107; +pub const vpiDelayTerm: u32 = 108; +pub const vpiDelayDevice: u32 = 109; +pub const vpiFrame: u32 = 110; +pub const vpiGateArray: u32 = 111; +pub const vpiModuleArray: u32 = 112; +pub const vpiPrimitiveArray: u32 = 113; +pub const vpiNetArray: u32 = 114; +pub const vpiRange: u32 = 115; +pub const vpiRegArray: u32 = 116; +pub const vpiSwitchArray: u32 = 117; +pub const vpiUdpArray: u32 = 118; +pub const vpiContAssignBit: u32 = 128; +pub const vpiNamedEventArray: u32 = 129; +pub const vpiIndexedPartSelect: u32 = 130; +pub const vpiGenScopeArray: u32 = 133; +pub const vpiGenScope: u32 = 134; +pub const vpiGenVar: u32 = 135; +pub const vpiCondition: u32 = 71; +pub const vpiDelay: u32 = 72; +pub const vpiElseStmt: u32 = 73; +pub const vpiForIncStmt: u32 = 74; +pub const vpiForInitStmt: u32 = 75; +pub const vpiHighConn: u32 = 76; +pub const vpiLhs: u32 = 77; +pub const vpiIndex: u32 = 78; +pub const vpiLeftRange: u32 = 79; +pub const vpiLowConn: u32 = 80; +pub const vpiParent: u32 = 81; +pub const vpiRhs: u32 = 82; +pub const vpiRightRange: u32 = 83; +pub const vpiScope: u32 = 84; +pub const vpiSysTfCall: u32 = 85; +pub const vpiTchkDataTerm: u32 = 86; +pub const vpiTchkNotifier: u32 = 87; +pub const vpiTchkRefTerm: u32 = 88; +pub const vpiArgument: u32 = 89; +pub const vpiBit: u32 = 90; +pub const vpiDriver: u32 = 91; +pub const vpiInternalScope: u32 = 92; +pub const vpiLoad: u32 = 93; +pub const vpiModDataPathIn: u32 = 94; +pub const vpiModPathIn: u32 = 95; +pub const vpiModPathOut: u32 = 96; +pub const vpiOperand: u32 = 97; +pub const vpiPortInst: u32 = 98; +pub const vpiProcess: u32 = 99; +pub const vpiVariables: u32 = 100; +pub const vpiUse: u32 = 101; +pub const vpiExpr: u32 = 102; +pub const vpiPrimitive: u32 = 103; +pub const vpiStmt: u32 = 104; +pub const vpiActiveTimeFormat: u32 = 119; +pub const vpiInTerm: u32 = 120; +pub const vpiInstanceArray: u32 = 121; +pub const vpiLocalDriver: u32 = 122; +pub const vpiLocalLoad: u32 = 123; +pub const vpiOutTerm: u32 = 124; +pub const vpiPorts: u32 = 125; +pub const vpiSimNet: u32 = 126; +pub const vpiTaskFunc: u32 = 127; +pub const vpiBaseExpr: u32 = 131; +pub const vpiWidthExpr: u32 = 132; +pub const vpiAutomatics: u32 = 136; +pub const vpiUndefined: i32 = -1; +pub const vpiType: u32 = 1; +pub const vpiName: u32 = 2; +pub const vpiFullName: u32 = 3; +pub const vpiSize: u32 = 4; +pub const vpiFile: u32 = 5; +pub const vpiLineNo: u32 = 6; +pub const vpiTopModule: u32 = 7; +pub const vpiCellInstance: u32 = 8; +pub const vpiDefName: u32 = 9; +pub const vpiProtected: u32 = 10; +pub const vpiTimeUnit: u32 = 11; +pub const vpiTimePrecision: u32 = 12; +pub const vpiDefNetType: u32 = 13; +pub const vpiUnconnDrive: u32 = 14; +pub const vpiHighZ: u32 = 1; +pub const vpiPull1: u32 = 2; +pub const vpiPull0: u32 = 3; +pub const vpiDefFile: u32 = 15; +pub const vpiDefLineNo: u32 = 16; +pub const vpiDefDelayMode: u32 = 47; +pub const vpiDelayModeNone: u32 = 1; +pub const vpiDelayModePath: u32 = 2; +pub const vpiDelayModeDistrib: u32 = 3; +pub const vpiDelayModeUnit: u32 = 4; +pub const vpiDelayModeZero: u32 = 5; +pub const vpiDelayModeMTM: u32 = 6; +pub const vpiDefDecayTime: u32 = 48; +pub const vpiScalar: u32 = 17; +pub const vpiVector: u32 = 18; +pub const vpiExplicitName: u32 = 19; +pub const vpiDirection: u32 = 20; +pub const vpiInput: u32 = 1; +pub const vpiOutput: u32 = 2; +pub const vpiInout: u32 = 3; +pub const vpiMixedIO: u32 = 4; +pub const vpiNoDirection: u32 = 5; +pub const vpiConnByName: u32 = 21; +pub const vpiNetType: u32 = 22; +pub const vpiWire: u32 = 1; +pub const vpiWand: u32 = 2; +pub const vpiWor: u32 = 3; +pub const vpiTri: u32 = 4; +pub const vpiTri0: u32 = 5; +pub const vpiTri1: u32 = 6; +pub const vpiTriReg: u32 = 7; +pub const vpiTriAnd: u32 = 8; +pub const vpiTriOr: u32 = 9; +pub const vpiSupply1: u32 = 10; +pub const vpiSupply0: u32 = 11; +pub const vpiNone: u32 = 12; +pub const vpiUwire: u32 = 13; +pub const vpiNettypeNet: u32 = 14; +pub const vpiNettypeNetSelect: u32 = 15; +pub const vpiInterconnect: u32 = 16; +pub const vpiExplicitScalared: u32 = 23; +pub const vpiExplicitVectored: u32 = 24; +pub const vpiExpanded: u32 = 25; +pub const vpiImplicitDecl: u32 = 26; +pub const vpiChargeStrength: u32 = 27; +pub const vpiArray: u32 = 28; +pub const vpiPortIndex: u32 = 29; +pub const vpiTermIndex: u32 = 30; +pub const vpiStrength0: u32 = 31; +pub const vpiStrength1: u32 = 32; +pub const vpiPrimType: u32 = 33; +pub const vpiAndPrim: u32 = 1; +pub const vpiNandPrim: u32 = 2; +pub const vpiNorPrim: u32 = 3; +pub const vpiOrPrim: u32 = 4; +pub const vpiXorPrim: u32 = 5; +pub const vpiXnorPrim: u32 = 6; +pub const vpiBufPrim: u32 = 7; +pub const vpiNotPrim: u32 = 8; +pub const vpiBufif0Prim: u32 = 9; +pub const vpiBufif1Prim: u32 = 10; +pub const vpiNotif0Prim: u32 = 11; +pub const vpiNotif1Prim: u32 = 12; +pub const vpiNmosPrim: u32 = 13; +pub const vpiPmosPrim: u32 = 14; +pub const vpiCmosPrim: u32 = 15; +pub const vpiRnmosPrim: u32 = 16; +pub const vpiRpmosPrim: u32 = 17; +pub const vpiRcmosPrim: u32 = 18; +pub const vpiRtranPrim: u32 = 19; +pub const vpiRtranif0Prim: u32 = 20; +pub const vpiRtranif1Prim: u32 = 21; +pub const vpiTranPrim: u32 = 22; +pub const vpiTranif0Prim: u32 = 23; +pub const vpiTranif1Prim: u32 = 24; +pub const vpiPullupPrim: u32 = 25; +pub const vpiPulldownPrim: u32 = 26; +pub const vpiSeqPrim: u32 = 27; +pub const vpiCombPrim: u32 = 28; +pub const vpiPolarity: u32 = 34; +pub const vpiDataPolarity: u32 = 35; +pub const vpiPositive: u32 = 1; +pub const vpiNegative: u32 = 2; +pub const vpiUnknown: u32 = 3; +pub const vpiEdge: u32 = 36; +pub const vpiNoEdge: u32 = 0; +pub const vpiEdge01: u32 = 1; +pub const vpiEdge10: u32 = 2; +pub const vpiEdge0x: u32 = 4; +pub const vpiEdgex1: u32 = 8; +pub const vpiEdge1x: u32 = 16; +pub const vpiEdgex0: u32 = 32; +pub const vpiPosedge: u32 = 13; +pub const vpiNegedge: u32 = 50; +pub const vpiAnyEdge: u32 = 63; +pub const vpiPathType: u32 = 37; +pub const vpiPathFull: u32 = 1; +pub const vpiPathParallel: u32 = 2; +pub const vpiTchkType: u32 = 38; +pub const vpiSetup: u32 = 1; +pub const vpiHold: u32 = 2; +pub const vpiPeriod: u32 = 3; +pub const vpiWidth: u32 = 4; +pub const vpiSkew: u32 = 5; +pub const vpiRecovery: u32 = 6; +pub const vpiNoChange: u32 = 7; +pub const vpiSetupHold: u32 = 8; +pub const vpiFullskew: u32 = 9; +pub const vpiRecrem: u32 = 10; +pub const vpiRemoval: u32 = 11; +pub const vpiTimeskew: u32 = 12; +pub const vpiOpType: u32 = 39; +pub const vpiMinusOp: u32 = 1; +pub const vpiPlusOp: u32 = 2; +pub const vpiNotOp: u32 = 3; +pub const vpiBitNegOp: u32 = 4; +pub const vpiUnaryAndOp: u32 = 5; +pub const vpiUnaryNandOp: u32 = 6; +pub const vpiUnaryOrOp: u32 = 7; +pub const vpiUnaryNorOp: u32 = 8; +pub const vpiUnaryXorOp: u32 = 9; +pub const vpiUnaryXNorOp: u32 = 10; +pub const vpiSubOp: u32 = 11; +pub const vpiDivOp: u32 = 12; +pub const vpiModOp: u32 = 13; +pub const vpiEqOp: u32 = 14; +pub const vpiNeqOp: u32 = 15; +pub const vpiCaseEqOp: u32 = 16; +pub const vpiCaseNeqOp: u32 = 17; +pub const vpiGtOp: u32 = 18; +pub const vpiGeOp: u32 = 19; +pub const vpiLtOp: u32 = 20; +pub const vpiLeOp: u32 = 21; +pub const vpiLShiftOp: u32 = 22; +pub const vpiRShiftOp: u32 = 23; +pub const vpiAddOp: u32 = 24; +pub const vpiMultOp: u32 = 25; +pub const vpiLogAndOp: u32 = 26; +pub const vpiLogOrOp: u32 = 27; +pub const vpiBitAndOp: u32 = 28; +pub const vpiBitOrOp: u32 = 29; +pub const vpiBitXorOp: u32 = 30; +pub const vpiBitXNorOp: u32 = 31; +pub const vpiBitXnorOp: u32 = 31; +pub const vpiConditionOp: u32 = 32; +pub const vpiConcatOp: u32 = 33; +pub const vpiMultiConcatOp: u32 = 34; +pub const vpiEventOrOp: u32 = 35; +pub const vpiNullOp: u32 = 36; +pub const vpiListOp: u32 = 37; +pub const vpiMinTypMaxOp: u32 = 38; +pub const vpiPosedgeOp: u32 = 39; +pub const vpiNegedgeOp: u32 = 40; +pub const vpiArithLShiftOp: u32 = 41; +pub const vpiArithRShiftOp: u32 = 42; +pub const vpiPowerOp: u32 = 43; +pub const vpiConstType: u32 = 40; +pub const vpiDecConst: u32 = 1; +pub const vpiRealConst: u32 = 2; +pub const vpiBinaryConst: u32 = 3; +pub const vpiOctConst: u32 = 4; +pub const vpiHexConst: u32 = 5; +pub const vpiStringConst: u32 = 6; +pub const vpiIntConst: u32 = 7; +pub const vpiTimeConst: u32 = 8; +pub const vpiBlocking: u32 = 41; +pub const vpiCaseType: u32 = 42; +pub const vpiCaseExact: u32 = 1; +pub const vpiCaseX: u32 = 2; +pub const vpiCaseZ: u32 = 3; +pub const vpiNetDeclAssign: u32 = 43; +pub const vpiFuncType: u32 = 44; +pub const vpiIntFunc: u32 = 1; +pub const vpiRealFunc: u32 = 2; +pub const vpiTimeFunc: u32 = 3; +pub const vpiSizedFunc: u32 = 4; +pub const vpiSizedSignedFunc: u32 = 5; +pub const vpiSysFuncType: u32 = 44; +pub const vpiSysFuncInt: u32 = 1; +pub const vpiSysFuncReal: u32 = 2; +pub const vpiSysFuncTime: u32 = 3; +pub const vpiSysFuncSized: u32 = 4; +pub const vpiUserDefn: u32 = 45; +pub const vpiScheduled: u32 = 46; +pub const vpiActive: u32 = 49; +pub const vpiAutomatic: u32 = 50; +pub const vpiCell: u32 = 51; +pub const vpiConfig: u32 = 52; +pub const vpiConstantSelect: u32 = 53; +pub const vpiDecompile: u32 = 54; +pub const vpiDefAttribute: u32 = 55; +pub const vpiDelayType: u32 = 56; +pub const vpiModPathDelay: u32 = 1; +pub const vpiInterModPathDelay: u32 = 2; +pub const vpiMIPDelay: u32 = 3; +pub const vpiIteratorType: u32 = 57; +pub const vpiLibrary: u32 = 58; +pub const vpiOffset: u32 = 60; +pub const vpiResolvedNetType: u32 = 61; +pub const vpiSaveRestartID: u32 = 62; +pub const vpiSaveRestartLocation: u32 = 63; +pub const vpiValid: u32 = 64; +pub const vpiValidFalse: u32 = 0; +pub const vpiValidTrue: u32 = 1; +pub const vpiSigned: u32 = 65; +pub const vpiLocalParam: u32 = 70; +pub const vpiModPathHasIfNone: u32 = 71; +pub const vpiIndexedPartSelectType: u32 = 72; +pub const vpiPosIndexed: u32 = 1; +pub const vpiNegIndexed: u32 = 2; +pub const vpiIsMemory: u32 = 73; +pub const vpiIsProtected: u32 = 74; +pub const vpiStop: u32 = 66; +pub const vpiFinish: u32 = 67; +pub const vpiReset: u32 = 68; +pub const vpiSetInteractiveScope: u32 = 69; +pub const vpiScaledRealTime: u32 = 1; +pub const vpiSimTime: u32 = 2; +pub const vpiSuppressTime: u32 = 3; +pub const vpiSupplyDrive: u32 = 128; +pub const vpiStrongDrive: u32 = 64; +pub const vpiPullDrive: u32 = 32; +pub const vpiWeakDrive: u32 = 8; +pub const vpiLargeCharge: u32 = 16; +pub const vpiMediumCharge: u32 = 4; +pub const vpiSmallCharge: u32 = 2; +pub const vpiHiZ: u32 = 1; +pub const vpiBinStrVal: u32 = 1; +pub const vpiOctStrVal: u32 = 2; +pub const vpiDecStrVal: u32 = 3; +pub const vpiHexStrVal: u32 = 4; +pub const vpiScalarVal: u32 = 5; +pub const vpiIntVal: u32 = 6; +pub const vpiRealVal: u32 = 7; +pub const vpiStringVal: u32 = 8; +pub const vpiVectorVal: u32 = 9; +pub const vpiStrengthVal: u32 = 10; +pub const vpiTimeVal: u32 = 11; +pub const vpiObjTypeVal: u32 = 12; +pub const vpiSuppressVal: u32 = 13; +pub const vpiShortIntVal: u32 = 14; +pub const vpiLongIntVal: u32 = 15; +pub const vpiShortRealVal: u32 = 16; +pub const vpiRawTwoStateVal: u32 = 17; +pub const vpiRawFourStateVal: u32 = 18; +pub const vpiNoDelay: u32 = 1; +pub const vpiInertialDelay: u32 = 2; +pub const vpiTransportDelay: u32 = 3; +pub const vpiPureTransportDelay: u32 = 4; +pub const vpiForceFlag: u32 = 5; +pub const vpiReleaseFlag: u32 = 6; +pub const vpiCancelEvent: u32 = 7; +pub const vpiReturnEvent: u32 = 4096; +pub const vpiUserAllocFlag: u32 = 8192; +pub const vpiOneValue: u32 = 16384; +pub const vpiPropagateOff: u32 = 32768; +pub const vpi0: u32 = 0; +pub const vpi1: u32 = 1; +pub const vpiZ: u32 = 2; +pub const vpiX: u32 = 3; +pub const vpiH: u32 = 4; +pub const vpiL: u32 = 5; +pub const vpiDontCare: u32 = 6; +pub const vpiSysTask: u32 = 1; +pub const vpiSysFunc: u32 = 2; +pub const vpiCompile: u32 = 1; +pub const vpiPLI: u32 = 2; +pub const vpiRun: u32 = 3; +pub const vpiNotice: u32 = 1; +pub const vpiWarning: u32 = 2; +pub const vpiError: u32 = 3; +pub const vpiSystem: u32 = 4; +pub const vpiInternal: u32 = 5; +pub const vpiPackage: u32 = 600; +pub const vpiInterface: u32 = 601; +pub const vpiProgram: u32 = 602; +pub const vpiInterfaceArray: u32 = 603; +pub const vpiProgramArray: u32 = 604; +pub const vpiTypespec: u32 = 605; +pub const vpiModport: u32 = 606; +pub const vpiInterfaceTfDecl: u32 = 607; +pub const vpiRefObj: u32 = 608; +pub const vpiTypeParameter: u32 = 609; +pub const vpiVarBit: u32 = 49; +pub const vpiLongIntVar: u32 = 610; +pub const vpiShortIntVar: u32 = 611; +pub const vpiIntVar: u32 = 612; +pub const vpiShortRealVar: u32 = 613; +pub const vpiByteVar: u32 = 614; +pub const vpiClassVar: u32 = 615; +pub const vpiStringVar: u32 = 616; +pub const vpiEnumVar: u32 = 617; +pub const vpiStructVar: u32 = 618; +pub const vpiUnionVar: u32 = 619; +pub const vpiBitVar: u32 = 620; +pub const vpiLogicVar: u32 = 48; +pub const vpiArrayVar: u32 = 116; +pub const vpiClassObj: u32 = 621; +pub const vpiChandleVar: u32 = 622; +pub const vpiPackedArrayVar: u32 = 623; +pub const vpiVirtualInterfaceVar: u32 = 728; +pub const vpiLongIntTypespec: u32 = 625; +pub const vpiShortRealTypespec: u32 = 626; +pub const vpiByteTypespec: u32 = 627; +pub const vpiShortIntTypespec: u32 = 628; +pub const vpiIntTypespec: u32 = 629; +pub const vpiClassTypespec: u32 = 630; +pub const vpiStringTypespec: u32 = 631; +pub const vpiChandleTypespec: u32 = 632; +pub const vpiEnumTypespec: u32 = 633; +pub const vpiEnumConst: u32 = 634; +pub const vpiIntegerTypespec: u32 = 635; +pub const vpiTimeTypespec: u32 = 636; +pub const vpiRealTypespec: u32 = 637; +pub const vpiStructTypespec: u32 = 638; +pub const vpiUnionTypespec: u32 = 639; +pub const vpiBitTypespec: u32 = 640; +pub const vpiLogicTypespec: u32 = 641; +pub const vpiArrayTypespec: u32 = 642; +pub const vpiVoidTypespec: u32 = 643; +pub const vpiTypespecMember: u32 = 644; +pub const vpiPackedArrayTypespec: u32 = 692; +pub const vpiSequenceTypespec: u32 = 696; +pub const vpiPropertyTypespec: u32 = 697; +pub const vpiEventTypespec: u32 = 698; +pub const vpiInterfaceTypespec: u32 = 906; +pub const vpiClockingBlock: u32 = 650; +pub const vpiClockingIODecl: u32 = 651; +pub const vpiClassDefn: u32 = 652; +pub const vpiConstraint: u32 = 653; +pub const vpiConstraintOrdering: u32 = 654; +pub const vpiDistItem: u32 = 645; +pub const vpiAliasStmt: u32 = 646; +pub const vpiThread: u32 = 647; +pub const vpiMethodFuncCall: u32 = 648; +pub const vpiMethodTaskCall: u32 = 649; +pub const vpiAssert: u32 = 686; +pub const vpiAssume: u32 = 687; +pub const vpiCover: u32 = 688; +pub const vpiRestrict: u32 = 901; +pub const vpiDisableCondition: u32 = 689; +pub const vpiClockingEvent: u32 = 690; +pub const vpiPropertyDecl: u32 = 655; +pub const vpiPropertySpec: u32 = 656; +pub const vpiPropertyExpr: u32 = 657; +pub const vpiMulticlockSequenceExpr: u32 = 658; +pub const vpiClockedSeq: u32 = 659; +pub const vpiClockedProp: u32 = 902; +pub const vpiPropertyInst: u32 = 660; +pub const vpiSequenceDecl: u32 = 661; +pub const vpiCaseProperty: u32 = 662; +pub const vpiCasePropertyItem: u32 = 905; +pub const vpiSequenceInst: u32 = 664; +pub const vpiImmediateAssert: u32 = 665; +pub const vpiImmediateAssume: u32 = 694; +pub const vpiImmediateCover: u32 = 695; +pub const vpiReturn: u32 = 666; +pub const vpiAnyPattern: u32 = 667; +pub const vpiTaggedPattern: u32 = 668; +pub const vpiStructPattern: u32 = 669; +pub const vpiDoWhile: u32 = 670; +pub const vpiOrderedWait: u32 = 671; +pub const vpiWaitFork: u32 = 672; +pub const vpiDisableFork: u32 = 673; +pub const vpiExpectStmt: u32 = 674; +pub const vpiForeachStmt: u32 = 675; +pub const vpiReturnStmt: u32 = 691; +pub const vpiFinal: u32 = 676; +pub const vpiExtends: u32 = 677; +pub const vpiDistribution: u32 = 678; +pub const vpiSeqFormalDecl: u32 = 679; +pub const vpiPropFormalDecl: u32 = 699; +pub const vpiArrayNet: u32 = 114; +pub const vpiEnumNet: u32 = 680; +pub const vpiIntegerNet: u32 = 681; +pub const vpiLogicNet: u32 = 36; +pub const vpiTimeNet: u32 = 682; +pub const vpiUnionNet: u32 = 525; +pub const vpiShortRealNet: u32 = 526; +pub const vpiRealNet: u32 = 527; +pub const vpiByteNet: u32 = 528; +pub const vpiShortIntNet: u32 = 529; +pub const vpiIntNet: u32 = 530; +pub const vpiLongIntNet: u32 = 531; +pub const vpiBitNet: u32 = 532; +pub const vpiInterconnectNet: u32 = 533; +pub const vpiInterconnectArray: u32 = 534; +pub const vpiStructNet: u32 = 683; +pub const vpiBreak: u32 = 684; +pub const vpiContinue: u32 = 685; +pub const vpiPackedArrayNet: u32 = 693; +pub const vpiNettypeDecl: u32 = 523; +pub const vpiConstraintExpr: u32 = 747; +pub const vpiElseConst: u32 = 748; +pub const vpiImplication: u32 = 749; +pub const vpiConstrIf: u32 = 738; +pub const vpiConstrIfElse: u32 = 739; +pub const vpiConstrForEach: u32 = 736; +pub const vpiSoftDisable: u32 = 733; +pub const vpiLetDecl: u32 = 903; +pub const vpiLetExpr: u32 = 904; +pub const vpiActual: u32 = 700; +pub const vpiTypedefAlias: u32 = 701; +pub const vpiIndexTypespec: u32 = 702; +pub const vpiBaseTypespec: u32 = 703; +pub const vpiElemTypespec: u32 = 704; +pub const vpiNetTypedefAlias: u32 = 705; +pub const vpiInputSkew: u32 = 706; +pub const vpiOutputSkew: u32 = 707; +pub const vpiGlobalClocking: u32 = 708; +pub const vpiDefaultClocking: u32 = 709; +pub const vpiDefaultDisableIff: u32 = 710; +pub const vpiOrigin: u32 = 713; +pub const vpiPrefix: u32 = 714; +pub const vpiWith: u32 = 715; +pub const vpiProperty: u32 = 718; +pub const vpiValueRange: u32 = 720; +pub const vpiPattern: u32 = 721; +pub const vpiWeight: u32 = 722; +pub const vpiConstraintItem: u32 = 746; +pub const vpiTypedef: u32 = 725; +pub const vpiImport: u32 = 726; +pub const vpiDerivedClasses: u32 = 727; +pub const vpiInterfaceDecl: u32 = 728; +pub const vpiMethods: u32 = 730; +pub const vpiSolveBefore: u32 = 731; +pub const vpiSolveAfter: u32 = 732; +pub const vpiWaitingProcesses: u32 = 734; +pub const vpiMessages: u32 = 735; +pub const vpiLoopVars: u32 = 737; +pub const vpiConcurrentAssertion: u32 = 740; +pub const vpiConcurrentAssertions: u32 = 740; +pub const vpiMatchItem: u32 = 741; +pub const vpiMember: u32 = 742; +pub const vpiElement: u32 = 743; +pub const vpiAssertion: u32 = 744; +pub const vpiInstance: u32 = 745; +pub const vpiTop: u32 = 600; +pub const vpiUnit: u32 = 602; +pub const vpiJoinType: u32 = 603; +pub const vpiJoin: u32 = 0; +pub const vpiJoinNone: u32 = 1; +pub const vpiJoinAny: u32 = 2; +pub const vpiAccessType: u32 = 604; +pub const vpiForkJoinAcc: u32 = 1; +pub const vpiExternAcc: u32 = 2; +pub const vpiDPIExportAcc: u32 = 3; +pub const vpiDPIImportAcc: u32 = 4; +pub const vpiArrayType: u32 = 606; +pub const vpiStaticArray: u32 = 1; +pub const vpiDynamicArray: u32 = 2; +pub const vpiAssocArray: u32 = 3; +pub const vpiQueueArray: u32 = 4; +pub const vpiArrayMember: u32 = 607; +pub const vpiIsRandomized: u32 = 608; +pub const vpiLocalVarDecls: u32 = 609; +pub const vpiOpStrong: u32 = 656; +pub const vpiRandType: u32 = 610; +pub const vpiNotRand: u32 = 1; +pub const vpiRand: u32 = 2; +pub const vpiRandC: u32 = 3; +pub const vpiPortType: u32 = 611; +pub const vpiInterfacePort: u32 = 1; +pub const vpiModportPort: u32 = 2; +pub const vpiConstantVariable: u32 = 612; +pub const vpiStructUnionMember: u32 = 615; +pub const vpiVisibility: u32 = 620; +pub const vpiPublicVis: u32 = 1; +pub const vpiProtectedVis: u32 = 2; +pub const vpiLocalVis: u32 = 3; +pub const vpiOneStepConst: u32 = 9; +pub const vpiUnboundedConst: u32 = 10; +pub const vpiNullConst: u32 = 11; +pub const vpiAlwaysType: u32 = 624; +pub const vpiAlwaysComb: u32 = 2; +pub const vpiAlwaysFF: u32 = 3; +pub const vpiAlwaysLatch: u32 = 4; +pub const vpiDistType: u32 = 625; +pub const vpiEqualDist: u32 = 1; +pub const vpiDivDist: u32 = 2; +pub const vpiPacked: u32 = 630; +pub const vpiTagged: u32 = 632; +pub const vpiRef: u32 = 6; +pub const vpiVirtual: u32 = 635; +pub const vpiHasActual: u32 = 636; +pub const vpiIsConstraintEnabled: u32 = 638; +pub const vpiSoft: u32 = 639; +pub const vpiClassType: u32 = 640; +pub const vpiMailboxClass: u32 = 1; +pub const vpiSemaphoreClass: u32 = 2; +pub const vpiUserDefinedClass: u32 = 3; +pub const vpiProcessClass: u32 = 4; +pub const vpiMethod: u32 = 645; +pub const vpiIsClockInferred: u32 = 649; +pub const vpiIsDeferred: u32 = 657; +pub const vpiIsFinal: u32 = 670; +pub const vpiIsCoverSequence: u32 = 659; +pub const vpiQualifier: u32 = 650; +pub const vpiNoQualifier: u32 = 0; +pub const vpiUniqueQualifier: u32 = 1; +pub const vpiPriorityQualifier: u32 = 2; +pub const vpiTaggedQualifier: u32 = 4; +pub const vpiRandQualifier: u32 = 8; +pub const vpiInsideQualifier: u32 = 16; +pub const vpiInputEdge: u32 = 651; +pub const vpiOutputEdge: u32 = 652; +pub const vpiGeneric: u32 = 653; +pub const vpiCompatibilityMode: u32 = 654; +pub const vpiMode1364v1995: u32 = 1; +pub const vpiMode1364v2001: u32 = 2; +pub const vpiMode1364v2005: u32 = 3; +pub const vpiMode1800v2005: u32 = 4; +pub const vpiMode1800v2009: u32 = 5; +pub const vpiPackedArrayMember: u32 = 655; +pub const vpiStartLine: u32 = 661; +pub const vpiColumn: u32 = 662; +pub const vpiEndLine: u32 = 663; +pub const vpiEndColumn: u32 = 664; +pub const vpiAllocScheme: u32 = 658; +pub const vpiAutomaticScheme: u32 = 1; +pub const vpiDynamicScheme: u32 = 2; +pub const vpiOtherScheme: u32 = 3; +pub const vpiObjId: u32 = 660; +pub const vpiDPIPure: u32 = 665; +pub const vpiDPIContext: u32 = 666; +pub const vpiDPICStr: u32 = 667; +pub const vpiDPI: u32 = 1; +pub const vpiDPIC: u32 = 2; +pub const vpiDPICIdentifier: u32 = 668; +pub const vpiIsModPort: u32 = 669; +pub const vpiImplyOp: u32 = 50; +pub const vpiNonOverlapImplyOp: u32 = 51; +pub const vpiOverlapImplyOp: u32 = 52; +pub const vpiAcceptOnOp: u32 = 83; +pub const vpiRejectOnOp: u32 = 84; +pub const vpiSyncAcceptOnOp: u32 = 85; +pub const vpiSyncRejectOnOp: u32 = 86; +pub const vpiOverlapFollowedByOp: u32 = 87; +pub const vpiNonOverlapFollowedByOp: u32 = 88; +pub const vpiNexttimeOp: u32 = 89; +pub const vpiAlwaysOp: u32 = 90; +pub const vpiEventuallyOp: u32 = 91; +pub const vpiUntilOp: u32 = 92; +pub const vpiUntilWithOp: u32 = 93; +pub const vpiUnaryCycleDelayOp: u32 = 53; +pub const vpiCycleDelayOp: u32 = 54; +pub const vpiIntersectOp: u32 = 55; +pub const vpiFirstMatchOp: u32 = 56; +pub const vpiThroughoutOp: u32 = 57; +pub const vpiWithinOp: u32 = 58; +pub const vpiRepeatOp: u32 = 59; +pub const vpiConsecutiveRepeatOp: u32 = 60; +pub const vpiGotoRepeatOp: u32 = 61; +pub const vpiPostIncOp: u32 = 62; +pub const vpiPreIncOp: u32 = 63; +pub const vpiPostDecOp: u32 = 64; +pub const vpiPreDecOp: u32 = 65; +pub const vpiMatchOp: u32 = 66; +pub const vpiCastOp: u32 = 67; +pub const vpiIffOp: u32 = 68; +pub const vpiWildEqOp: u32 = 69; +pub const vpiWildNeqOp: u32 = 70; +pub const vpiStreamLROp: u32 = 71; +pub const vpiStreamRLOp: u32 = 72; +pub const vpiMatchedOp: u32 = 73; +pub const vpiTriggeredOp: u32 = 74; +pub const vpiAssignmentPatternOp: u32 = 75; +pub const vpiMultiAssignmentPatternOp: u32 = 76; +pub const vpiIfOp: u32 = 77; +pub const vpiIfElseOp: u32 = 78; +pub const vpiCompAndOp: u32 = 79; +pub const vpiCompOrOp: u32 = 80; +pub const vpiImpliesOp: u32 = 94; +pub const vpiInsideOp: u32 = 95; +pub const vpiTypeOp: u32 = 81; +pub const vpiAssignmentOp: u32 = 82; +pub const vpiOtherFunc: u32 = 6; +pub const vpiValidUnknown: u32 = 2; +pub const vpiCoverageStart: u32 = 750; +pub const vpiCoverageStop: u32 = 751; +pub const vpiCoverageReset: u32 = 752; +pub const vpiCoverageCheck: u32 = 753; +pub const vpiCoverageMerge: u32 = 754; +pub const vpiCoverageSave: u32 = 755; +pub const vpiAssertCoverage: u32 = 760; +pub const vpiFsmStateCoverage: u32 = 761; +pub const vpiStatementCoverage: u32 = 762; +pub const vpiToggleCoverage: u32 = 763; +pub const vpiCovered: u32 = 765; +pub const vpiCoverMax: u32 = 766; +pub const vpiCoveredMax: u32 = 766; +pub const vpiCoveredCount: u32 = 767; +pub const vpiAssertAttemptCovered: u32 = 770; +pub const vpiAssertSuccessCovered: u32 = 771; +pub const vpiAssertFailureCovered: u32 = 772; +pub const vpiAssertVacuousSuccessCovered: u32 = 773; +pub const vpiAssertDisableCovered: u32 = 774; +pub const vpiAssertKillCovered: u32 = 777; +pub const vpiFsmStates: u32 = 775; +pub const vpiFsmStateExpression: u32 = 776; +pub const vpiFsm: u32 = 758; +pub const vpiFsmHandle: u32 = 759; +pub const vpiAssertionLock: u32 = 645; +pub const vpiAssertionUnlock: u32 = 646; +pub const vpiAssertionDisable: u32 = 620; +pub const vpiAssertionEnable: u32 = 621; +pub const vpiAssertionReset: u32 = 622; +pub const vpiAssertionKill: u32 = 623; +pub const vpiAssertionEnableStep: u32 = 624; +pub const vpiAssertionDisableStep: u32 = 625; +pub const vpiAssertionClockSteps: u32 = 626; +pub const vpiAssertionSysLock: u32 = 647; +pub const vpiAssertionSysUnlock: u32 = 648; +pub const vpiAssertionSysOn: u32 = 627; +pub const vpiAssertionSysOff: u32 = 628; +pub const vpiAssertionSysKill: u32 = 632; +pub const vpiAssertionSysEnd: u32 = 629; +pub const vpiAssertionSysReset: u32 = 630; +pub const vpiAssertionDisablePassAction: u32 = 633; +pub const vpiAssertionEnablePassAction: u32 = 634; +pub const vpiAssertionDisableFailAction: u32 = 635; +pub const vpiAssertionEnableFailAction: u32 = 636; +pub const vpiAssertionDisableVacuousAction: u32 = 637; +pub const vpiAssertionEnableNonvacuousAction: u32 = 638; +pub const vpiAssertionSysEnablePassAction: u32 = 639; +pub const vpiAssertionSysEnableFailAction: u32 = 640; +pub const vpiAssertionSysDisablePassAction: u32 = 641; +pub const vpiAssertionSysDisableFailAction: u32 = 642; +pub const vpiAssertionSysEnableNonvacuousAction: u32 = 643; +pub const vpiAssertionSysDisableVacuousAction: u32 = 644; +pub type va_list = __builtin_va_list; +pub type PLI_INT64 = i64; +pub type PLI_INT32 = ::std::os::raw::c_int; +pub type PLI_UINT32 = ::std::os::raw::c_uint; +pub type PLI_INT16 = ::std::os::raw::c_short; +pub type PLI_BYTE8 = ::std::os::raw::c_char; +#[doc = " TYPEDEFS"] +pub type vpiHandle = *mut PLI_UINT32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_time { + pub type_: PLI_INT32, + pub high: PLI_UINT32, + pub low: PLI_UINT32, + pub real: f64, +} +#[test] +fn bindgen_test_layout_t_vpi_time() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(t_vpi_time)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_time)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).high) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(high) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).low) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(low) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).real) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_time), + "::", + stringify!(real) + ) + ); +} +pub type s_vpi_time = t_vpi_time; +pub type p_vpi_time = *mut t_vpi_time; +#[doc = " delay structures"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_delay { + pub da: *mut t_vpi_time, + pub no_of_delays: PLI_INT32, + pub time_type: PLI_INT32, + pub mtm_flag: PLI_INT32, + pub append_flag: PLI_INT32, + pub pulsere_flag: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_delay() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(t_vpi_delay)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_delay)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).da) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(da) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).no_of_delays) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(no_of_delays) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).time_type) as usize - ptr as usize }, + 12usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(time_type) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).mtm_flag) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(mtm_flag) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).append_flag) as usize - ptr as usize }, + 20usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(append_flag) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).pulsere_flag) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_delay), + "::", + stringify!(pulsere_flag) + ) + ); +} +#[doc = " delay structures"] +pub type p_vpi_delay = *mut t_vpi_delay; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_vecval { + pub aval: PLI_UINT32, + pub bval: PLI_UINT32, +} +#[test] +fn bindgen_test_layout_t_vpi_vecval() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(t_vpi_vecval)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).aval) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(aval) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).bval) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vecval), + "::", + stringify!(bval) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_strengthval { + pub logic: PLI_INT32, + pub s0: PLI_INT32, + pub s1: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_strengthval() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(t_vpi_strengthval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(t_vpi_strengthval)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).logic) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_strengthval), + "::", + stringify!(logic) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).s0) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_strengthval), + "::", + stringify!(s0) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).s1) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_strengthval), + "::", + stringify!(s1) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct t_vpi_value { + pub format: PLI_INT32, + pub value: t_vpi_value__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union t_vpi_value__bindgen_ty_1 { + pub str_: *mut PLI_BYTE8, + pub scalar: PLI_INT32, + pub integer: PLI_INT32, + pub real: f64, + pub time: *mut t_vpi_time, + pub vector: *mut t_vpi_vecval, + pub strength: *mut t_vpi_strengthval, + pub misc: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_vpi_value__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_value__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_value__bindgen_ty_1)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).str_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(str_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).scalar) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(scalar) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).integer) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(integer) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).real) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(real) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).time) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vector) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(vector) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).strength) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(strength) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).misc) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value__bindgen_ty_1), + "::", + stringify!(misc) + ) + ); +} +#[test] +fn bindgen_test_layout_t_vpi_value() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(t_vpi_value)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_value)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).format) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value), + "::", + stringify!(format) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).value) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_value), + "::", + stringify!(value) + ) + ); +} +pub type p_vpi_value = *mut t_vpi_value; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct t_vpi_arrayvalue { + pub format: PLI_UINT32, + pub flags: PLI_UINT32, + pub value: t_vpi_arrayvalue__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union t_vpi_arrayvalue__bindgen_ty_1 { + pub integers: *mut PLI_INT32, + pub shortints: *mut PLI_INT16, + pub longints: *mut PLI_INT64, + pub rawvals: *mut PLI_BYTE8, + pub vectors: *mut t_vpi_vecval, + pub times: *mut t_vpi_time, + pub reals: *mut f64, + pub shortreals: *mut f32, +} +#[test] +fn bindgen_test_layout_t_vpi_arrayvalue__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_arrayvalue__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_arrayvalue__bindgen_ty_1)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).integers) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(integers) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).shortints) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(shortints) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).longints) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(longints) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).rawvals) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(rawvals) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).vectors) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(vectors) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).times) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(times) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reals) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(reals) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).shortreals) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue__bindgen_ty_1), + "::", + stringify!(shortreals) + ) + ); +} +#[test] +fn bindgen_test_layout_t_vpi_arrayvalue() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(t_vpi_arrayvalue)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_arrayvalue)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).format) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue), + "::", + stringify!(format) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).value) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_arrayvalue), + "::", + stringify!(value) + ) + ); +} +pub type p_vpi_arrayvalue = *mut t_vpi_arrayvalue; +#[doc = " system task/function structure"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_systf_data { + pub type_: PLI_INT32, + pub sysfunctype: PLI_INT32, + pub tfname: *mut PLI_BYTE8, + pub calltf: ::std::option::Option PLI_INT32>, + pub compiletf: ::std::option::Option PLI_INT32>, + pub sizetf: ::std::option::Option PLI_INT32>, + pub user_data: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_vpi_systf_data() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 48usize, + concat!("Size of: ", stringify!(t_vpi_systf_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_systf_data)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).sysfunctype) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(sysfunctype) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).tfname) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(tfname) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).calltf) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(calltf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).compiletf) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(compiletf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).sizetf) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(sizetf) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).user_data) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_systf_data), + "::", + stringify!(user_data) + ) + ); +} +#[doc = " system task/function structure"] +pub type p_vpi_systf_data = *mut t_vpi_systf_data; +#[doc = " SystemVerilog execution information structure"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_vlog_info { + pub argc: PLI_INT32, + pub argv: *mut *mut PLI_BYTE8, + pub product: *mut PLI_BYTE8, + pub version: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_vpi_vlog_info() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(t_vpi_vlog_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_vlog_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).argc) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(argc) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).argv) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(argv) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).product) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(product) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).version) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_vlog_info), + "::", + stringify!(version) + ) + ); +} +#[doc = " SystemVerilog execution information structure"] +pub type p_vpi_vlog_info = *mut t_vpi_vlog_info; +#[doc = " PLI error information structure"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_error_info { + pub state: PLI_INT32, + pub level: PLI_INT32, + pub message: *mut PLI_BYTE8, + pub product: *mut PLI_BYTE8, + pub code: *mut PLI_BYTE8, + pub file: *mut PLI_BYTE8, + pub line: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_error_info() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 48usize, + concat!("Size of: ", stringify!(t_vpi_error_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_error_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).state) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(state) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).level) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(level) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).message) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(message) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).product) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(product) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).code) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(code) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).file) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(file) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).line) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_error_info), + "::", + stringify!(line) + ) + ); +} +#[doc = " PLI error information structure"] +pub type p_vpi_error_info = *mut t_vpi_error_info; +#[doc = " callback structures"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_cb_data { + pub reason: PLI_INT32, + pub cb_rtn: ::std::option::Option PLI_INT32>, + pub obj: vpiHandle, + pub time: p_vpi_time, + pub value: p_vpi_value, + pub index: PLI_INT32, + pub user_data: *mut PLI_BYTE8, +} +#[test] +fn bindgen_test_layout_t_cb_data() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 56usize, + concat!("Size of: ", stringify!(t_cb_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_cb_data)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reason) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(reason) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).cb_rtn) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(cb_rtn) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).obj) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(obj) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).time) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).value) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(value) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).index) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(index) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).user_data) as usize - ptr as usize }, + 48usize, + concat!( + "Offset of field: ", + stringify!(t_cb_data), + "::", + stringify!(user_data) + ) + ); +} +#[doc = " callback structures"] +pub type p_cb_data = *mut t_cb_data; +extern "C" { + pub fn vpi_register_cb(cb_data_p: p_cb_data) -> vpiHandle; +} +extern "C" { + pub fn vpi_remove_cb(cb_obj: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_cb_info(object: vpiHandle, cb_data_p: p_cb_data); +} +extern "C" { + pub fn vpi_register_systf(systf_data_p: p_vpi_systf_data) -> vpiHandle; +} +extern "C" { + pub fn vpi_get_systf_info(object: vpiHandle, systf_data_p: p_vpi_systf_data); +} +extern "C" { + pub fn vpi_handle_by_name(name: *mut PLI_BYTE8, scope: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_handle_by_index(object: vpiHandle, indx: PLI_INT32) -> vpiHandle; +} +extern "C" { + pub fn vpi_handle(type_: PLI_INT32, refHandle: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_handle_multi( + type_: PLI_INT32, + refHandle1: vpiHandle, + refHandle2: vpiHandle, + ... + ) -> vpiHandle; +} +extern "C" { + pub fn vpi_iterate(type_: PLI_INT32, refHandle: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_scan(iterator: vpiHandle) -> vpiHandle; +} +extern "C" { + pub fn vpi_get(property: PLI_INT32, object: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get64(property: PLI_INT32, object: vpiHandle) -> PLI_INT64; +} +extern "C" { + pub fn vpi_get_str(property: PLI_INT32, object: vpiHandle) -> *mut PLI_BYTE8; +} +extern "C" { + pub fn vpi_get_delays(object: vpiHandle, delay_p: p_vpi_delay); +} +extern "C" { + pub fn vpi_put_delays(object: vpiHandle, delay_p: p_vpi_delay); +} +extern "C" { + pub fn vpi_get_value(expr: vpiHandle, value_p: p_vpi_value); +} +extern "C" { + pub fn vpi_put_value( + object: vpiHandle, + value_p: p_vpi_value, + time_p: p_vpi_time, + flags: PLI_INT32, + ) -> vpiHandle; +} +extern "C" { + pub fn vpi_get_value_array( + object: vpiHandle, + arrayvalue_p: p_vpi_arrayvalue, + index_p: *mut PLI_INT32, + num: PLI_UINT32, + ); +} +extern "C" { + pub fn vpi_put_value_array( + object: vpiHandle, + arrayvalue_p: p_vpi_arrayvalue, + index_p: *mut PLI_INT32, + num: PLI_UINT32, + ); +} +extern "C" { + pub fn vpi_get_time(object: vpiHandle, time_p: p_vpi_time); +} +extern "C" { + pub fn vpi_mcd_open(fileName: *mut PLI_BYTE8) -> PLI_UINT32; +} +extern "C" { + pub fn vpi_mcd_close(mcd: PLI_UINT32) -> PLI_UINT32; +} +extern "C" { + pub fn vpi_mcd_name(cd: PLI_UINT32) -> *mut PLI_BYTE8; +} +extern "C" { + pub fn vpi_mcd_printf(mcd: PLI_UINT32, format: *mut PLI_BYTE8, ...) -> PLI_INT32; +} +extern "C" { + pub fn vpi_printf(format: *mut PLI_BYTE8, ...) -> PLI_INT32; +} +extern "C" { + pub fn vpi_compare_objects(object1: vpiHandle, object2: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_chk_error(error_info_p: p_vpi_error_info) -> PLI_INT32; +} +extern "C" { + pub fn vpi_free_object(object: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_release_handle(object: vpiHandle) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_vlog_info(vlog_info_p: p_vpi_vlog_info) -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_data(id: PLI_INT32, dataLoc: *mut PLI_BYTE8, numOfBytes: PLI_INT32) + -> PLI_INT32; +} +extern "C" { + pub fn vpi_put_data(id: PLI_INT32, dataLoc: *mut PLI_BYTE8, numOfBytes: PLI_INT32) + -> PLI_INT32; +} +extern "C" { + pub fn vpi_get_userdata(obj: vpiHandle) -> *mut ::std::os::raw::c_void; +} +extern "C" { + pub fn vpi_put_userdata(obj: vpiHandle, userdata: *mut ::std::os::raw::c_void) -> PLI_INT32; +} +extern "C" { + pub fn vpi_vprintf(format: *mut PLI_BYTE8, ap: *mut __va_list_tag) -> PLI_INT32; +} +extern "C" { + pub fn vpi_mcd_vprintf( + mcd: PLI_UINT32, + format: *mut PLI_BYTE8, + ap: *mut __va_list_tag, + ) -> PLI_INT32; +} +extern "C" { + pub fn vpi_flush() -> PLI_INT32; +} +extern "C" { + pub fn vpi_mcd_flush(mcd: PLI_UINT32) -> PLI_INT32; +} +extern "C" { + pub fn vpi_control(operation: PLI_INT32, ...) -> PLI_INT32; +} +extern "C" { + pub fn vpi_handle_by_multi_index( + obj: vpiHandle, + num_index: PLI_INT32, + index_array: *mut PLI_INT32, + ) -> vpiHandle; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct t_vpi_assertion_step_info { + pub matched_expression_count: PLI_INT32, + pub matched_exprs: *mut vpiHandle, + pub stateFrom: PLI_INT32, + pub stateTo: PLI_INT32, +} +#[test] +fn bindgen_test_layout_t_vpi_assertion_step_info() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(t_vpi_assertion_step_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_assertion_step_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).matched_expression_count) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(matched_expression_count) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).matched_exprs) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(matched_exprs) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).stateFrom) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(stateFrom) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).stateTo) as usize - ptr as usize }, + 20usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_assertion_step_info), + "::", + stringify!(stateTo) + ) + ); +} +pub type p_vpi_assertion_step_info = *mut t_vpi_assertion_step_info; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct t_vpi_attempt_info { + pub detail: t_vpi_attempt_info__bindgen_ty_1, + pub attemptStartTime: s_vpi_time, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union t_vpi_attempt_info__bindgen_ty_1 { + pub failExpr: vpiHandle, + pub step: p_vpi_assertion_step_info, +} +#[test] +fn bindgen_test_layout_t_vpi_attempt_info__bindgen_ty_1() { + const UNINIT: ::std::mem::MaybeUninit = + ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(t_vpi_attempt_info__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!( + "Alignment of ", + stringify!(t_vpi_attempt_info__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).failExpr) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info__bindgen_ty_1), + "::", + stringify!(failExpr) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).step) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info__bindgen_ty_1), + "::", + stringify!(step) + ) + ); +} +#[test] +fn bindgen_test_layout_t_vpi_attempt_info() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(t_vpi_attempt_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(t_vpi_attempt_info)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).detail) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info), + "::", + stringify!(detail) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).attemptStartTime) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(t_vpi_attempt_info), + "::", + stringify!(attemptStartTime) + ) + ); +} +pub type p_vpi_attempt_info = *mut t_vpi_attempt_info; +pub type vpi_assertion_callback_func = ::std::option::Option< + unsafe extern "C" fn( + reason: PLI_INT32, + cb_time: p_vpi_time, + assertion: vpiHandle, + info: p_vpi_attempt_info, + user_data: *mut PLI_BYTE8, + ) -> PLI_INT32, +>; +extern "C" { + pub fn vpi_register_assertion_cb( + assertion: vpiHandle, + reason: PLI_INT32, + cb_rtn: vpi_assertion_callback_func, + user_data: *mut PLI_BYTE8, + ) -> vpiHandle; +} +pub type __builtin_va_list = [__va_list_tag; 1usize]; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __va_list_tag { + pub gp_offset: ::std::os::raw::c_uint, + pub fp_offset: ::std::os::raw::c_uint, + pub overflow_arg_area: *mut ::std::os::raw::c_void, + pub reg_save_area: *mut ::std::os::raw::c_void, +} +#[test] +fn bindgen_test_layout___va_list_tag() { + const UNINIT: ::std::mem::MaybeUninit<__va_list_tag> = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::<__va_list_tag>(), + 24usize, + concat!("Size of: ", stringify!(__va_list_tag)) + ); + assert_eq!( + ::std::mem::align_of::<__va_list_tag>(), + 8usize, + concat!("Alignment of ", stringify!(__va_list_tag)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).gp_offset) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(gp_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).fp_offset) as usize - ptr as usize }, + 4usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(fp_offset) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).overflow_arg_area) as usize - ptr as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(overflow_arg_area) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).reg_save_area) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(__va_list_tag), + "::", + stringify!(reg_save_area) + ) + ); +} diff --git a/t1rocketemu/online_drive/Cargo.toml b/t1rocketemu/online_drive/Cargo.toml new file mode 100644 index 000000000..929a8b546 --- /dev/null +++ b/t1rocketemu/online_drive/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "online_drive" +version = "0.1.0" +edition = "2021" + +[dependencies] +online_dpi = { path = "../online_dpi", features = ["sv2023"] } + +[build-dependencies] +cmake = "0.1.50" + +[features] +trace = ["online_dpi/trace"] diff --git a/t1rocketemu/online_drive/build.rs b/t1rocketemu/online_drive/build.rs new file mode 100644 index 000000000..fe883aaa8 --- /dev/null +++ b/t1rocketemu/online_drive/build.rs @@ -0,0 +1,21 @@ +use cmake::Config; + +fn main() { + #[cfg(feature = "trace")] + let dst = + Config::new("verilator_shim").define("VM_TRACE", "1").very_verbose(true).always_configure(true).build(); + #[cfg(not(feature = "trace"))] + let dst = Config::new("verilator_shim").very_verbose(true).always_configure(true).build(); + + println!("cargo::rustc-link-search=native={}/lib", dst.display()); + + // link order matters! so we use +whole-archive here + // verilator_main <- VTestBench <-- verilated <- verilator_shim <- stdc++ + // verilated <- libz + println!("cargo::rustc-link-lib=static:+whole-archive=verilator_shim"); + println!("cargo::rustc-link-lib=static:+whole-archive=VTestBench"); + println!("cargo::rustc-link-lib=static:+whole-archive=verilated"); + println!("cargo::rustc-link-lib=stdc++"); + println!("cargo::rustc-link-lib=z"); + println!("cargo::rerun-if-env-changed=VERILATED_LIB_DIR"); +} diff --git a/t1rocketemu/online_drive/src/main.rs b/t1rocketemu/online_drive/src/main.rs new file mode 100644 index 000000000..3a6a2aa13 --- /dev/null +++ b/t1rocketemu/online_drive/src/main.rs @@ -0,0 +1,31 @@ +// force link with online_dpi +extern crate online_dpi; + +use std::{ + ffi::{c_char, c_int, CString}, + ptr, +}; + +fn main() { + let c_args: Vec = std::env::args().map(|arg| CString::new(arg).unwrap()).collect(); + + let mut c_args_ptr: Vec<*const c_char> = c_args.iter().map(|arg| arg.as_ptr()).collect(); + c_args_ptr.push(ptr::null()); + + let argc = c_args.len() as c_int; + let argv = c_args_ptr.as_ptr() as *mut *mut c_char; + + unsafe { + verilator_main_c(argc, argv); + } + + std::fs::write( + "perf.txt", + format!("total_cycles: {}", online_dpi::get_t()), + ) + .expect("fail to write into perf.txt"); +} + +extern "C" { + fn verilator_main_c(argc: c_int, argv: *mut *mut c_char) -> c_int; +} diff --git a/t1rocketemu/online_drive/verilator_shim/CMakeLists.txt b/t1rocketemu/online_drive/verilator_shim/CMakeLists.txt new file mode 100644 index 000000000..e7aefb74f --- /dev/null +++ b/t1rocketemu/online_drive/verilator_shim/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.20) +project(verilator_shim) +set(CMAKE_CXX_STANDARD 17) + +message(STATUS "Project '${PROJECT_NAME}' build type: ${CMAKE_BUILD_TYPE}") + +set(THREADS_PREFER_PTHREAD_FLAG ON) + +add_library(verilator_shim + STATIC + verilator_shim.cc +) + +if (NOT DEFINED VERILATED_LIB_DIR) + set(VERILATED_LIB_DIR "$ENV{VERILATED_LIB_DIR}") + if (VERILATED_LIB_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATE_LIB_DIR or environment variable VERILATED_LIB_DIR, but it seems not") + endif() +endif() + +if (NOT DEFINED VERILATED_INC_DIR) + set(VERILATED_INC_DIR "$ENV{VERILATED_INC_DIR}") + if (VERILATED_INC_DIR STREQUAL "") + message(FATAL_ERROR "You should specify verilated libs via -DVERILATED_INC_DIR or environment variable VERILATED_INC_DIR, but it seems not") + endif() +endif() + +# include verilator headers +find_package(verilator REQUIRED) +message(STATUS "Found verilator: ${verilator_DIR}") +target_include_directories(verilator_shim PUBLIC ${verilator_DIR}/include) +target_include_directories(verilator_shim PUBLIC ${verilator_DIR}/include/vltstd) + +if(DEFINED VM_TRACE) + target_compile_definitions(verilator_shim PRIVATE VM_TRACE=1) +endif() + +install(TARGETS verilator_shim ARCHIVE) diff --git a/t1rocketemu/online_drive/verilator_shim/verilator_shim.cc b/t1rocketemu/online_drive/verilator_shim/verilator_shim.cc new file mode 100644 index 000000000..1c0a479d7 --- /dev/null +++ b/t1rocketemu/online_drive/verilator_shim/verilator_shim.cc @@ -0,0 +1,40 @@ +#include +#include + +class VTestBench; + +extern "C" int verilator_main_c(int argc, char **argv) { + // Setup context, defaults, and parse command line + Verilated::debug(0); + VerilatedContext* contextp = new VerilatedContext(); + contextp->fatalOnError(false); + contextp->commandArgs(argc, argv); +#ifdef VM_TRACE + contextp->traceEverOn(true); +#endif + + // Construct the Verilated model, from Vtop.h generated from Verilating + VTestBench* topp = new VTestBench(contextp); + + // Simulate until $finish + while (!contextp->gotFinish()) { + // Evaluate model + topp->eval(); + // Advance time + if (!topp->eventsPending()) + break; + contextp->time(topp->nextTimeSlot()); + } + + if (!contextp->gotFinish()) { + VL_DEBUG_IF(VL_PRINTF("+ Exiting without $finish; no events left\n");); + } + + // Final model cleanup + topp->final(); + + delete topp; + delete contextp; + + return 0; +} diff --git a/t1rocketemu/online_vcs/Cargo.toml b/t1rocketemu/online_vcs/Cargo.toml new file mode 100644 index 000000000..d85b2b690 --- /dev/null +++ b/t1rocketemu/online_vcs/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "online_vcs" +edition = "2021" +version.workspace = true + +[lib] +crate-type = ["staticlib"] +name = "dpi" + +[dependencies] +online_dpi = { path = "../online_dpi", features = ["svvpi"] } + +[features] +trace = ["online_dpi/trace"] diff --git a/t1rocketemu/online_vcs/default.nix b/t1rocketemu/online_vcs/default.nix new file mode 100644 index 000000000..b1400fe1f --- /dev/null +++ b/t1rocketemu/online_vcs/default.nix @@ -0,0 +1,48 @@ +{ lib +, elaborateConfig +, rustPlatform +, libspike +, libspike_interfaces +, enable-trace ? false +, vcStaticHome +}: + +rustPlatform.buildRustPackage { + name = "vcs-dpi-lib"; + src = with lib.fileset; toSource { + root = ../.; + fileset = unions [ + ../spike_rs + ../offline + ../online_dpi + ../online_drive + ../online_vcs + ../test_common + ../Cargo.lock + ../Cargo.toml + ]; + }; + + buildFeatures = lib.optionals enable-trace [ "trace" ]; + buildAndTestSubdir = "./online_vcs"; + + env = { + VCS_LIB_DIR = "${vcStaticHome}/vcs-mx/linux64/lib"; + SPIKE_LIB_DIR = "${libspike}/lib"; + SPIKE_INTERFACES_LIB_DIR = "${libspike_interfaces}/lib"; + DESIGN_VLEN = elaborateConfig.parameter.vLen; + DESIGN_DLEN = elaborateConfig.parameter.dLen; + SPIKE_ISA_STRING = + "rv32gc" + + (builtins.concatStringsSep "_" elaborateConfig.parameter.extensions) + + "_Zvl${toString elaborateConfig.parameter.vLen}b"; + }; + + cargoLock = { + lockFile = ../Cargo.lock; + }; + + passthru = { + inherit enable-trace; + }; +} diff --git a/t1rocketemu/online_vcs/src/lib.rs b/t1rocketemu/online_vcs/src/lib.rs new file mode 100644 index 000000000..be27f2116 --- /dev/null +++ b/t1rocketemu/online_vcs/src/lib.rs @@ -0,0 +1,2 @@ +// force link with online_dpi +extern crate online_dpi; diff --git a/t1rocketemu/readme.md b/t1rocketemu/readme.md new file mode 100644 index 000000000..dfd1c0380 --- /dev/null +++ b/t1rocketemu/readme.md @@ -0,0 +1,11 @@ +## Build + +```bash +nix build ".#t1..ip.difftest" +``` + +## Develop + +```bash +nix develop ".#t1..ip.difftest.devShell" +``` diff --git a/t1rocketemu/spike_interfaces/CMakeLists.txt b/t1rocketemu/spike_interfaces/CMakeLists.txt new file mode 100644 index 000000000..fe5272891 --- /dev/null +++ b/t1rocketemu/spike_interfaces/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.20) +project(spike_interfaces LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 17) + +find_package(libspike REQUIRED) + +add_library(${CMAKE_PROJECT_NAME} STATIC spike_interfaces.cc) + +target_link_libraries(${CMAKE_PROJECT_NAME} PUBLIC libspike) + +target_include_directories(${CMAKE_PROJECT_NAME} INTERFACE + $ + $ +) + +# just playing with CMake export, maybe not necessary +target_sources(${CMAKE_PROJECT_NAME} PUBLIC + FILE_SET HEADERS + FILES spike_interfaces.h spike_interfaces_c.h) + +install( + TARGETS ${CMAKE_PROJECT_NAME} + EXPORT ${CMAKE_PROJECT_NAME}-config + PUBLIC_HEADER + FILE_SET HEADERS +) + +install( + EXPORT ${CMAKE_PROJECT_NAME}-config + NAMESPACE ${CMAKE_PROJECT_NAME}:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME} +) diff --git a/t1rocketemu/spike_interfaces/default.nix b/t1rocketemu/spike_interfaces/default.nix new file mode 100644 index 000000000..d95618793 --- /dev/null +++ b/t1rocketemu/spike_interfaces/default.nix @@ -0,0 +1,11 @@ +{ lib, stdenv, cmake, libspike }: + +stdenv.mkDerivation { + name = "spike_interfaces"; + src = with lib.fileset; toSource { + root = ./.; + fileset = fileFilter (file: file.name != "default.nix") ./.; + }; + nativeBuildInputs = [ cmake ]; + propagatedBuildInputs = [ libspike ]; +} diff --git a/t1rocketemu/spike_interfaces/spike_interfaces-config.cmake b/t1rocketemu/spike_interfaces/spike_interfaces-config.cmake new file mode 100644 index 000000000..8a0867895 --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces-config.cmake @@ -0,0 +1,3 @@ +include(CMakeFindDependencyMacro) +find_dependency(libspike 0.1.0) +include(${CMAKE_CURRENT_LIST_DIR}/libspike_interface_targets.cmake) diff --git a/t1rocketemu/spike_interfaces/spike_interfaces.cc b/t1rocketemu/spike_interfaces/spike_interfaces.cc new file mode 100644 index 000000000..479ebe11b --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces.cc @@ -0,0 +1,252 @@ +#include + +#include "spike_interfaces.h" + +constexpr uint32_t CSR_MSIMEND = 0x7cc; + +void *ffi_target; + +cfg_t make_spike_cfg() { + cfg_t cfg; + cfg.initrd_bounds = std::make_pair((reg_t)0, (reg_t)0), + cfg.bootargs = nullptr; + cfg.isa = DEFAULT_ISA; + cfg.priv = DEFAULT_PRIV; + cfg.misaligned = false; + cfg.endianness = endianness_little; + cfg.pmpregions = 16; + cfg.pmpgranularity = 4; + cfg.mem_layout = std::vector(); + cfg.hartids = std::vector(); + cfg.explicit_hartids = false; + cfg.real_time_clint = false; + cfg.trigger_count = 4; + return cfg; +} + +Spike::Spike(const char *set, const char *lvl, + size_t lane_number) + : sim(), isa(set, lvl), cfg(make_spike_cfg()), + proc( + /*isa*/ &isa, + /*cfg*/ &cfg, + /*sim*/ &sim, + /*id*/ 0, + /*halt on reset*/ true, + /*log_file_t*/ nullptr, + /*sout*/ std::cerr) { + proc.VU.lane_num = lane_number; + proc.VU.lane_granularity = 32; + + auto &csrmap = proc.get_state()->csrmap; + csrmap[CSR_MSIMEND] = std::make_shared(&proc, CSR_MSIMEND, 1); + proc.enable_log_commits(); +} + +spike_t *spike_new(const char *set, const char *lvl, + size_t lane_number) { + return new spike_t{new Spike(set, lvl, lane_number)}; +} + +const char *proc_disassemble(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto mmu = proc->p->get_mmu(); + auto disasm = proc->p->get_disassembler(); + auto fetch = mmu->load_insn(pc); + return strdup(disasm->disassemble(fetch.insn).c_str()); +} + +spike_processor_t *spike_get_proc(spike_t *spike) { + return new spike_processor_t{spike->s->get_proc()}; +} + +void proc_reset(spike_processor_t *proc) { proc->p->reset(); } + +spike_state_t *proc_get_state(spike_processor_t *proc) { + return new spike_state_t{proc->p->get_state()}; +} + +reg_t proc_func(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto mmu = proc->p->get_mmu(); + auto fetch = mmu->load_insn(pc); + try { + return fetch.func(proc->p, fetch.insn, pc); + } catch (trap_t &trap) { + std::cerr << "Error: spike trapped with " << trap.name() + << " (tval=" << std::uppercase << std::setfill('0') + << std::setw(8) << std::hex << trap.get_tval() + << ", tval2=" << std::setw(8) << std::hex << trap.get_tval2() + << ", tinst=" << std::setw(8) << std::hex << trap.get_tinst() + << ")" << std::endl; + throw trap; + } +} + +reg_t proc_get_insn(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto mmu = proc->p->get_mmu(); + auto fetch = mmu->load_insn(pc); + return fetch.insn.bits(); +} + +uint8_t proc_get_vreg_data(spike_processor_t *proc, uint32_t vreg_idx, + uint32_t vreg_offset) { + return proc->p->VU.elt(vreg_idx, vreg_offset); +} + +uint32_t extract_f32(freg_t f) { return (uint32_t)f.v[0]; } + +inline uint32_t clip(uint32_t binary, int a, int b) { + int nbits = b - a + 1; + uint32_t mask = nbits >= 32 ? (uint32_t)-1 : (1 << nbits) - 1; + return (binary >> a) & mask; +} + +uint32_t proc_get_rs1(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return (uint32_t)fetch.insn.rs1(); +} + +uint32_t proc_get_rs2(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return (uint32_t)fetch.insn.rs2(); +} + +uint32_t proc_get_rd(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return fetch.insn.rd(); +} + +uint64_t proc_vu_get_vtype(spike_processor_t *proc) { + return proc->p->VU.vtype->read(); +} + +uint32_t proc_vu_get_vxrm(spike_processor_t *proc) { + return proc->p->VU.vxrm->read(); +} + +uint32_t proc_vu_get_vnf(spike_processor_t *proc) { + auto pc = proc->p->get_state()->pc; + auto fetch = proc->p->get_mmu()->load_insn(pc); + return fetch.insn.v_nf(); +} + +bool proc_vu_get_vill(spike_processor_t *proc) { return proc->p->VU.vill; } + +bool proc_vu_get_vxsat(spike_processor_t *proc) { + return proc->p->VU.vxsat->read(); +} + +uint32_t proc_vu_get_vl(spike_processor_t *proc) { + return proc->p->VU.vl->read(); +} + +uint16_t proc_vu_get_vstart(spike_processor_t *proc) { + return proc->p->VU.vstart->read(); +} + +reg_t state_get_pc(spike_state_t *state) { return state->s->pc; } + +void state_set_mcycle(spike_state_t *state, size_t mcycle) { + state->s->mcycle->write((int64_t)mcycle); +} + +void state_clear(spike_state_t *state) { + state->s->log_reg_write.clear(); + state->s->log_mem_read.clear(); + state->s->log_mem_write.clear(); +} + +static void state_set_serialized(spike_state_t *state, bool serialized) { + state->s->serialized = serialized; +} + +uint64_t state_handle_pc(spike_state_t *state, uint64_t new_pc) { + if ((new_pc & 1) == 0) { + state_set_pc(state, new_pc); + } else { + switch (new_pc) { + case PC_SERIALIZE_BEFORE: + state_set_serialized(state, true); + break; + case PC_SERIALIZE_AFTER: + break; + default: + return -1; + } + } + return 0; +} + +void state_set_pc(spike_state_t *state, uint64_t pc) { state->s->pc = pc; } + +uint32_t state_get_reg(spike_state_t *state, uint32_t index, bool is_fp) { + if (is_fp) { + auto &fr = state->s->FPR; + return extract_f32(fr[index]); + } + auto &xr = state->s->XPR; + return (uint32_t)xr[index]; +} + +uint32_t state_get_reg_write_size(spike_state_t *state) { + reg_write_index_vec.clear(); + for (auto [idx, data] : state->s->log_reg_write) { + reg_write_index_vec.push_back(idx); + } + return state->s->log_reg_write.size(); +} + +uint32_t state_get_reg_write_index(spike_state_t *state, uint32_t index) { + return reg_write_index_vec[index]; +} + +uint32_t state_get_mem_write_size(spike_state_t *state) { + return state->s->log_mem_write.size(); +} + +uint32_t state_get_mem_write_addr(spike_state_t *state, uint32_t index) { + return std::get<0>(state->s->log_mem_write[index]) & 0xffffffff; +} + +uint64_t state_get_mem_write_value(spike_state_t *state, uint32_t index) { + return std::get<1>(state->s->log_mem_write[index]); +} + +uint8_t state_get_mem_write_size_by_byte(spike_state_t *state, uint32_t index) { + return std::get<2>(state->s->log_mem_write[index]); +} + +uint32_t state_get_mem_read_size(spike_state_t *state) { + return state->s->log_mem_read.size(); +} + +uint32_t state_get_mem_read_addr(spike_state_t *state, uint32_t index) { + return std::get<0>(state->s->log_mem_read[index]) & 0xffffffff; +} + +uint8_t state_get_mem_read_size_by_byte(spike_state_t *state, uint32_t index) { + return std::get<2>(state->s->log_mem_read[index]); +} + +reg_t state_exit(spike_state_t *state) { + auto &csrmap = state->s->csrmap; + return csrmap[CSR_MSIMEND]->read(); +} + +void spike_register_callback(void *ffi_target_, ffi_callback callback) { + ffi_addr_to_mem = callback; + ffi_target = ffi_target_; + + return; +} + +void spike_destruct(spike_t *spike) { delete spike; } + +void proc_destruct(spike_processor_t *proc) { delete proc; } + +void state_destruct(spike_state_t *state) { delete state; } diff --git a/t1rocketemu/spike_interfaces/spike_interfaces.h b/t1rocketemu/spike_interfaces/spike_interfaces.h new file mode 100644 index 000000000..ceffa8f7f --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces.h @@ -0,0 +1,76 @@ +#ifndef __SPIKE_INTERFCES_H__ +#define __SPIKE_INTERFCES_H__ + +#include "cfg.h" +#include "decode_macros.h" +#include "disasm.h" +#include "mmu.h" +#include "processor.h" +#include "simif.h" +#include "spike_interfaces_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ffi_callback ffi_addr_to_mem; +extern void *ffi_target; +std::vector reg_write_index_vec; + +class sim_t : public simif_t { +public: + sim_t() {} + ~sim_t() {} + char *addr_to_mem(reg_t addr) override { + return ffi_addr_to_mem(ffi_target, addr); + } + bool mmio_load(reg_t addr, size_t len, uint8_t *bytes) override { + throw std::logic_error("not implemented"); + } + bool mmio_store(reg_t addr, size_t len, const uint8_t *bytes) override { + throw std::logic_error("not implemented"); + } + virtual void proc_reset(unsigned id) override {} + virtual const char *get_symbol(uint64_t addr) override { + throw std::logic_error("not implemented"); + } + [[nodiscard]] const cfg_t &get_cfg() const override { + throw std::logic_error("not implemented"); + } + [[nodiscard]] const std::map & + get_harts() const override { + throw std::logic_error("not implemented"); + } +}; + +class Spike { +public: + Spike(const char *set, const char *lvl, size_t lane_number); + processor_t *get_proc() { return &proc; } + +private: + cfg_t cfg; + sim_t sim; + isa_parser_t isa; + processor_t proc; +}; + +struct spike_t { + Spike *s; + ffi_callback ffi_addr_to_mem; +}; +struct spike_processor_t { + processor_t *p; +}; +struct spike_state_t { + state_t *s; +}; +struct spike_mmu_t { + mmu_t *m; +}; + +#ifdef __cplusplus +} +#endif + +#endif // __SPIKE_INTERFCES_H__ diff --git a/t1rocketemu/spike_interfaces/spike_interfaces_c.h b/t1rocketemu/spike_interfaces/spike_interfaces_c.h new file mode 100644 index 000000000..6c43acaf0 --- /dev/null +++ b/t1rocketemu/spike_interfaces/spike_interfaces_c.h @@ -0,0 +1,65 @@ +#ifndef __SPIKE_INTERFCES_C_H__ +#define __SPIKE_INTERFCES_C_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef char *(*ffi_callback)(void *, uint64_t); + +typedef struct spike_t spike_t; +typedef struct spike_processor_t spike_processor_t; +typedef struct spike_state_t spike_state_t; + +void spike_register_callback(void *ffi_target, ffi_callback callback); +spike_t *spike_new(const char *set, const char *lvl, + size_t lane_number); +const char *proc_disassemble(spike_processor_t *proc); +void proc_reset(spike_processor_t *proc); +spike_processor_t *spike_get_proc(spike_t *spike); +spike_state_t *proc_get_state(spike_processor_t *proc); + +uint64_t proc_func(spike_processor_t *proc); +uint64_t proc_get_insn(spike_processor_t *proc); +uint8_t proc_get_vreg_data(spike_processor_t *proc, uint32_t vreg_idx, + uint32_t vreg_offset); +uint32_t proc_get_rs1(spike_processor_t *proc); +uint32_t proc_get_rs2(spike_processor_t *proc); +uint32_t proc_get_rd(spike_processor_t *proc); + +uint64_t proc_vu_get_vtype(spike_processor_t *proc); +uint32_t proc_vu_get_vxrm(spike_processor_t *proc); +uint32_t proc_vu_get_vnf(spike_processor_t *proc); +bool proc_vu_get_vill(spike_processor_t *proc); +bool proc_vu_get_vxsat(spike_processor_t *proc); +uint32_t proc_vu_get_vl(spike_processor_t *proc); +uint16_t proc_vu_get_vstart(spike_processor_t *proc); + +uint64_t state_get_pc(spike_state_t *state); +uint64_t state_handle_pc(spike_state_t *state, uint64_t new_pc); +void state_set_pc(spike_state_t *state, uint64_t pc); +uint32_t state_get_reg(spike_state_t *state, uint32_t index, bool is_fp); +uint32_t state_get_reg_write_size(spike_state_t *state); +uint32_t state_get_reg_write_index(spike_state_t *state, uint32_t index); +uint32_t state_get_mem_write_size(spike_state_t *state); +uint32_t state_get_mem_write_addr(spike_state_t *state, uint32_t index); +uint64_t state_get_mem_write_value(spike_state_t *state, uint32_t index); +uint8_t state_get_mem_write_size_by_byte(spike_state_t *state, uint32_t index); +uint32_t state_get_mem_read_size(spike_state_t *state); +uint32_t state_get_mem_read_addr(spike_state_t *state, uint32_t index); +uint8_t state_get_mem_read_size_by_byte(spike_state_t *state, uint32_t index); +void state_set_mcycle(spike_state_t *state, size_t mcycle); +void state_clear(spike_state_t *state); + +void spike_destruct(spike_t *spike); +void proc_destruct(spike_processor_t *proc); +void state_destruct(spike_state_t *state); +uint64_t state_exit(spike_state_t *state); + +#ifdef __cplusplus +} +#endif + +#endif // __SPIKE_INTERFCES_C_H__ diff --git a/t1rocketemu/spike_rs/Cargo.toml b/t1rocketemu/spike_rs/Cargo.toml new file mode 100644 index 000000000..411d44f72 --- /dev/null +++ b/t1rocketemu/spike_rs/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "spike_rs" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +tracing = { workspace = true } +libc = "0.2.155" +xmas-elf = "0.9.1" diff --git a/t1rocketemu/spike_rs/build.rs b/t1rocketemu/spike_rs/build.rs new file mode 100644 index 000000000..9399fdaf0 --- /dev/null +++ b/t1rocketemu/spike_rs/build.rs @@ -0,0 +1,18 @@ +use std::env; + +fn main() { + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_LIB_DIR").expect("SPIKE_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=riscv"); + println!("cargo::rustc-link-lib=static=softfloat"); + println!("cargo::rustc-link-lib=static=disasm"); + println!("cargo::rustc-link-lib=static=fesvr"); + println!("cargo::rustc-link-lib=static=fdt"); + + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_INTERFACES_LIB_DIR").expect("SPIKE_INTERFACES_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=spike_interfaces"); + + println!("cargo::rerun-if-env-changed=SPIKE_LIB_DIR"); + println!("cargo::rerun-if-env-changed=SPIKE_INTERFACES_LIB_DIR"); + + println!("cargo::rustc-link-lib=stdc++"); +} diff --git a/t1rocketemu/spike_rs/src/lib.rs b/t1rocketemu/spike_rs/src/lib.rs new file mode 100644 index 000000000..0d64e6d24 --- /dev/null +++ b/t1rocketemu/spike_rs/src/lib.rs @@ -0,0 +1,287 @@ +pub mod spike_event; +pub mod util; + +use libc::c_char; +use std::ffi::{CStr, CString}; +use tracing::trace; + +pub fn clip(binary: u32, a: i32, b: i32) -> u32 { + assert!(a <= b, "a should be less than or equal to b"); + let nbits = b - a + 1; + let mask = if nbits >= 32 { + u32::MAX + } else { + (1 << nbits) - 1 + }; + (binary >> a) & mask +} + +pub struct Spike { + spike: *mut (), + pub mem: Vec, + pub size: usize, +} + +unsafe impl Send for Spike {} + +extern "C" fn default_addr_to_mem(target: *mut (), addr: u64) -> *mut u8 { + let spike = target as *mut Spike; + let addr = addr as usize; + unsafe { + let spike: &mut Spike = &mut *spike; + let ptr = spike.mem.as_mut_ptr().offset(addr as isize); + ptr + } +} + +type FfiCallback = extern "C" fn(*mut (), u64) -> *mut u8; + +impl Spike { + // we need to have a boxed SpikeCObject, since its pointer will be passed to C to perform FFI call + pub fn new(set: &str, lvl: &str, lane_number: usize, mem_size: usize) -> Box { + let set = CString::new(set).unwrap(); + let lvl = CString::new(lvl).unwrap(); + let spike = unsafe { spike_new(set.as_ptr(), lvl.as_ptr(), lane_number) }; + let mut self_: Box = Box::new(Spike { spike, mem: vec![0; mem_size], size: mem_size }); + + // TODO: support customized ffi + let ffi_target: *mut Spike = &mut *self_; + unsafe { + spike_register_callback(ffi_target as *mut (), default_addr_to_mem); + } + + self_ + } + + pub fn get_proc(&self) -> Processor { + let processor = unsafe { spike_get_proc(self.spike) }; + Processor { processor } + } + + pub fn load_bytes_to_mem( + &mut self, + addr: usize, + len: usize, + bytes: Vec, + ) -> anyhow::Result<()> { + trace!("ld: addr: 0x{:x}, len: 0x{:x}", addr, len); + assert!(addr + len <= self.size); + + let dst = &mut self.mem[addr..addr + len]; + for (i, byte) in bytes.iter().enumerate() { + dst[i] = *byte; + } + + Ok(()) + } + + pub fn mem_byte_on_addr(&self, addr: usize) -> anyhow::Result { + Ok(self.mem[addr]) + } +} + +impl Drop for Spike { + fn drop(&mut self) { + unsafe { spike_destruct(self.spike) } + } +} + +pub struct Processor { + processor: *mut (), +} + +impl Processor { + pub fn disassemble(&self) -> String { + let bytes = unsafe { proc_disassemble(self.processor) }; + let c_str = unsafe { CStr::from_ptr(bytes as *mut c_char) }; + format!("{}", c_str.to_string_lossy()) + } + + pub fn reset(&self) { + unsafe { proc_reset(self.processor) } + } + + pub fn get_state(&self) -> State { + let state = unsafe { proc_get_state(self.processor) }; + State { state } + } + + pub fn func(&self) -> u64 { + unsafe { proc_func(self.processor) } + } + + pub fn get_insn(&self) -> u32 { + unsafe { proc_get_insn(self.processor) as u32 } + } + + pub fn get_vreg_data(&self, idx: u32, offset: u32) -> u8 { + unsafe { proc_get_vreg_data(self.processor, idx, offset) } + } + + pub fn get_rs1(&self) -> u32 { + unsafe { proc_get_rs1(self.processor) } + } + + pub fn get_rs2(&self) -> u32 { + unsafe { proc_get_rs2(self.processor) } + } + + pub fn get_rd(&self) -> u32 { + unsafe { proc_get_rd(self.processor) } + } + + // vu + pub fn vu_get_vtype(&self) -> u32 { + unsafe { proc_vu_get_vtype(self.processor) as u32 } + } + + pub fn vu_get_vxrm(&self) -> u32 { + unsafe { proc_vu_get_vxrm(self.processor) } + } + + pub fn vu_get_vnf(&self) -> u32 { + unsafe { proc_vu_get_vnf(self.processor) } + } + + pub fn vu_get_vill(&self) -> bool { + unsafe { proc_vu_get_vill(self.processor) } + } + + pub fn vu_get_vxsat(&self) -> bool { + unsafe { proc_vu_get_vxsat(self.processor) } + } + + pub fn vu_get_vl(&self) -> u32 { + unsafe { proc_vu_get_vl(self.processor) } + } + + pub fn vu_get_vstart(&self) -> u16 { + unsafe { proc_vu_get_vstart(self.processor) } + } +} + +impl Drop for Processor { + fn drop(&mut self) { + unsafe { proc_destruct(self.processor) } + } +} + +pub struct State { + state: *mut (), +} + +impl State { + pub fn set_pc(&self, pc: u64) { + unsafe { state_set_pc(self.state, pc) } + } + + pub fn get_pc(&self) -> u64 { + unsafe { state_get_pc(self.state) } + } + + pub fn handle_pc(&self, pc: u64) -> anyhow::Result<()> { + match unsafe { state_handle_pc(self.state, pc) } { + 0 => Ok(()), + _ => Err(anyhow::anyhow!("Error handling pc")), + } + } + + pub fn get_reg(&self, idx: u32, is_fp: bool) -> u32 { + unsafe { state_get_reg(self.state, idx, is_fp) } + } + + pub fn get_reg_write_size(&self) -> u32 { + unsafe { state_get_reg_write_size(self.state) } + } + + pub fn get_reg_write_index(&self, index: u32) -> u32 { + unsafe { state_get_reg_write_index(self.state, index) } + } + + pub fn get_mem_write_size(&self) -> u32 { + unsafe { state_get_mem_write_size(self.state) } + } + + pub fn get_mem_write(&self, index: u32) -> (u32, u64, u8) { + let addr = unsafe { state_get_mem_write_addr(self.state, index) }; + let value = unsafe { state_get_mem_write_value(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_write_size_by_byte(self.state, index) }; + (addr, value, size_by_byte) + } + + pub fn get_mem_read_size(&self) -> u32 { + unsafe { state_get_mem_read_size(self.state) } + } + + pub fn get_mem_read(&self, index: u32) -> (u32, u8) { + let addr = unsafe { state_get_mem_read_addr(self.state, index) }; + let size_by_byte = unsafe { state_get_mem_read_size_by_byte(self.state, index) }; + (addr, size_by_byte) + } + + pub fn set_mcycle(&self, mcycle: usize) { + unsafe { state_set_mcycle(self.state, mcycle) } + } + + pub fn clear(&self) { + unsafe { state_clear(self.state) } + } + + pub fn exit(&self) -> u64 { + unsafe { state_exit(self.state) } + } +} + +impl Drop for State { + fn drop(&mut self) { + unsafe { state_destruct(self.state) } + } +} + +#[link(name = "spike_interfaces")] +extern "C" { + pub fn spike_register_callback(target: *mut (), callback: FfiCallback); + fn spike_new( + set: *const c_char, + lvl: *const c_char, + lane_number: usize, + ) -> *mut (); + fn spike_get_proc(spike: *mut ()) -> *mut (); + fn spike_destruct(spike: *mut ()); + fn proc_disassemble(proc: *mut ()) -> *mut c_char; + fn proc_reset(proc: *mut ()); + fn proc_get_state(proc: *mut ()) -> *mut (); + fn proc_func(proc: *mut ()) -> u64; + fn proc_get_insn(proc: *mut ()) -> u64; + fn proc_get_vreg_data(proc: *mut (), vreg_idx: u32, vreg_offset: u32) -> u8; + fn proc_get_rs1(proc: *mut ()) -> u32; + fn proc_get_rs2(proc: *mut ()) -> u32; + fn proc_get_rd(proc: *mut ()) -> u32; + + fn proc_vu_get_vtype(proc: *mut ()) -> u64; + fn proc_vu_get_vxrm(proc: *mut ()) -> u32; + fn proc_vu_get_vnf(proc: *mut ()) -> u32; + fn proc_vu_get_vill(proc: *mut ()) -> bool; + fn proc_vu_get_vxsat(proc: *mut ()) -> bool; + fn proc_vu_get_vl(proc: *mut ()) -> u32; + fn proc_vu_get_vstart(proc: *mut ()) -> u16; + + fn proc_destruct(proc: *mut ()); + fn state_set_pc(state: *mut (), pc: u64); + fn state_get_pc(state: *mut ()) -> u64; + fn state_get_reg(state: *mut (), index: u32, is_fp: bool) -> u32; + fn state_get_reg_write_size(state: *mut ()) -> u32; + fn state_get_reg_write_index(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_size(state: *mut ()) -> u32; + fn state_get_mem_write_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_write_value(state: *mut (), index: u32) -> u64; + fn state_get_mem_write_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_get_mem_read_size(state: *mut ()) -> u32; + fn state_get_mem_read_addr(state: *mut (), index: u32) -> u32; + fn state_get_mem_read_size_by_byte(state: *mut (), index: u32) -> u8; + fn state_handle_pc(state: *mut (), pc: u64) -> u64; + fn state_set_mcycle(state: *mut (), mcycle: usize); + fn state_clear(state: *mut ()); + fn state_destruct(state: *mut ()); + fn state_exit(state: *mut ()) -> u64; +} diff --git a/t1rocketemu/spike_rs/src/spike_event.rs b/t1rocketemu/spike_rs/src/spike_event.rs new file mode 100644 index 000000000..611f7156b --- /dev/null +++ b/t1rocketemu/spike_rs/src/spike_event.rs @@ -0,0 +1,523 @@ +use std::collections::HashMap; +use tracing::trace; +use Default; + +use crate::clip; +use crate::Spike; + +#[derive(Debug, Clone)] +pub struct SingleMemWrite { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct SingleMemRead { + pub val: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Debug, Clone)] +pub struct MemWriteRecord { + pub writes: Vec, + pub num_completed_writes: usize, +} + +#[derive(Debug, Clone)] +pub struct MemReadRecord { + pub reads: Vec, + pub num_completed_reads: usize, +} + +#[derive(Debug, Clone)] +pub struct SingleVrfWrite { + pub byte: u8, + pub executed: bool, // set to true when rtl execute this mem access +} + +#[derive(Default, Debug, Clone)] +pub struct VdWriteRecord { + vd_bytes: Vec, +} + +#[derive(Default, Debug, Clone)] +pub struct MemAccessRecord { + pub all_writes: HashMap, + pub all_reads: HashMap, +} + +#[derive(Default, Debug, Clone)] +pub struct VrfAccessRecord { + pub all_writes: HashMap, + pub unretired_writes: Option, + pub retired_writes: u32, +} + +pub const LSU_IDX_DEFAULT: u8 = 0xff; +pub const ISSUE_IDX_DEFAULT: u8 = 0xff; + +#[derive(Default, Debug, Clone)] +pub struct SpikeEvent { + pub do_log_vrf: bool, + + // index + pub lsu_idx: u8, + pub issue_idx: u8, + + // instruction + pub disasm: String, + pub pc: u64, + pub inst_bits: u32, + + // scalar to vector interface(used for driver) + pub rs1: u32, + pub rs2: u32, + pub rs1_bits: u32, + pub rs2_bits: u32, + pub rd_idx: u32, + + // vtype + pub vtype: u32, + pub vxrm: u32, + pub vnf: u32, + + // other CSR + pub vill: bool, + pub vxsat: bool, + pub vl: u32, + pub vstart: u16, + + // rd + pub rd_bits: u32, + + // mutable states + pub is_rd_written: bool, + pub vd_write_record: VdWriteRecord, + pub mem_access_record: MemAccessRecord, + pub vrf_access_record: VrfAccessRecord, +} + +impl SpikeEvent { + pub fn new(spike: &Spike, do_log_vrf: bool) -> Self { + let proc = spike.get_proc(); + let state = proc.get_state(); + let inst_bits = proc.get_insn(); + + let opcode = clip(inst_bits, 0, 6); + let width = clip(inst_bits, 12, 14); + + let is_rs_fp = opcode == 0b1010111 && width == 0b101/* OPFVF */; + // early return vsetvl scalar instruction + + // rs1, rs2 + let (rs1, rs2) = (proc.get_rs1(), proc.get_rs2()); + + SpikeEvent { + do_log_vrf, + + lsu_idx: LSU_IDX_DEFAULT, + issue_idx: ISSUE_IDX_DEFAULT, + + disasm: spike.get_proc().disassemble(), + pc: proc.get_state().get_pc(), + inst_bits, + + rs1, + rs2, + rs1_bits: state.get_reg(rs1, is_rs_fp), + rs2_bits: state.get_reg(rs2, is_rs_fp), + rd_idx: proc.get_rd(), + + vtype: proc.vu_get_vtype(), + vxrm: proc.vu_get_vxrm(), + vnf: proc.vu_get_vnf(), + + vill: proc.vu_get_vill(), + vxsat: proc.vu_get_vxsat(), + vl: proc.vu_get_vl(), + vstart: proc.vu_get_vstart(), + + rd_bits: Default::default(), + + is_rd_written: false, + vd_write_record: Default::default(), + mem_access_record: Default::default(), + vrf_access_record: Default::default(), + } + } + + pub fn opcode(&self) -> u32 { + clip(self.inst_bits, 0, 6) + } + + pub fn width(&self) -> u32 { + clip(self.inst_bits, 12, 14) + } + + pub fn rs1(&self) -> u32 { + clip(self.inst_bits, 15, 19) + } + + pub fn csr(&self) -> u32 { + clip(self.inst_bits, 20, 31) + } + + pub fn funct6(&self) -> u32 { + clip(self.inst_bits, 26, 31) + } + + pub fn mop(&self) -> u32 { + clip(self.inst_bits, 26, 27) + } + + pub fn lumop(&self) -> u32 { + clip(self.inst_bits, 20, 24) + } + + pub fn vm(&self) -> bool { + clip(self.inst_bits, 25, 25) != 0 + } + + // check whether the instruction is a vector load + pub fn is_vload(&self) -> bool { + self.opcode() == 0b0000111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + // check whether the instruction is a vector store + pub fn is_vstore(&self) -> bool { + self.opcode() == 0b0100111 && self.width().wrapping_sub(1) & 0b100 != 0 + } + + pub fn is_v(&self) -> bool { + (self.opcode() == 0b1010111 || self.is_vload() || self.is_vstore()) && !self.is_vsetvl() + } + + pub fn is_vsetvl(&self) -> bool { + self.opcode() == 0b1010111 && self.width() == 0b111 + } + + pub fn is_scalar(&self) -> bool { + !self.is_v() + } + + // check whether the instruction is a scalar load + pub fn is_load(&self) -> bool { + self.opcode() == 0b0000011 || self.is_cl() + } + + // check whether the instruction is a scalar store + pub fn is_store(&self) -> bool { + self.opcode() == 0b0100011 || self.is_cw() + } + + pub fn is_whole(&self) -> bool { + self.mop() == 0 && self.lumop() == 8 + } + + pub fn is_widening(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_mask_vd(&self) -> bool { + self.opcode() == 0b1010111 && (self.funct6() >> 4) == 0b11 + } + + pub fn is_exit(&self) -> bool { + let is_csr_type = self.opcode() == 0b1110011 && ((self.width() & 0b011) != 0); + let is_csr_write = is_csr_type && (((self.width() & 0b100) | self.rs1()) != 0); + + is_csr_write && self.csr() == 0x7cc + } + + pub fn is_vfence(&self) -> bool { + self.is_exit() // only exit instruction is treated as fence now + } + + pub fn is_rd_fp(&self) -> bool { + (self.opcode() == 0b1010111) + && (self.rs1 == 0) + && (self.funct6() == 0b010000) + && self.vm() + && (self.width() == 0b001) + } + + pub fn c_op(&self) -> u32 { + clip(self.inst_bits, 0, 1) + } + + pub fn c_func3(&self) -> u32 { + clip(self.inst_bits, 13, 15) + } + + pub fn is_cl(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 == 0 ) || /* c.lw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 == 0 ) /* c.lwsp */ + } + + pub fn is_cw(&self) -> bool { + ( self.c_op() == 0b00 && self.c_func3() & 0b100 != 0 ) || /* c.sw */ + ( self.c_op() == 0b10 && self.c_func3() & 0b100 != 0 ) /* c.swsp */ + } + + pub fn vlmul(&self) -> u32 { + clip(self.vtype, 0, 2) + } + + pub fn vma(&self) -> bool { + clip(self.vtype, 7, 7) != 0 + } + + pub fn vta(&self) -> bool { + clip(self.vtype, 6, 6) != 0 + } + + pub fn vsew(&self) -> u32 { + clip(self.vtype, 3, 5) + } + + pub fn vcsr(&self) -> u32 { + self.vxsat as u32 | self.vxrm << 1 + } + + pub fn describe_insn(&self) -> String { + format!( + "pc={:#x}, disasm='{}', bits={:#x}", + self.pc, self.disasm, self.inst_bits + ) + } + + pub fn get_vrf_write_range(&self, vlen_in_bytes: u32) -> anyhow::Result<(u32, u32)> { + if self.is_vstore() { + return Ok((0, 0)); + } + + if self.is_vload() { + let vd_bytes_start = self.rd_idx * vlen_in_bytes; + if self.is_whole() { + return Ok((vd_bytes_start, vlen_in_bytes * (1 + self.vnf))); + } + let len = if self.vlmul() & 0b100 != 0 { + vlen_in_bytes * (1 + self.vnf) + } else { + (vlen_in_bytes * (1 + self.vnf)) << self.vlmul() + }; + return Ok((vd_bytes_start, len)); + } + + let vd_bytes_start = self.rd_idx * vlen_in_bytes; + + if self.is_mask_vd() { + return Ok((vd_bytes_start, vlen_in_bytes)); + } + + let len = if self.vlmul() & 0b100 != 0 { + vlen_in_bytes >> (8 - self.vlmul()) + } else { + vlen_in_bytes << self.vlmul() + }; + + Ok(( + vd_bytes_start, + if self.is_widening() { len * 2 } else { len }, + )) + } + + pub fn pre_log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + if self.do_log_vrf { + self.rd_bits = spike.get_proc().get_rd(); + + // record the vrf writes before executing the insn + let vlen_in_bytes = vlen; + + let proc = spike.get_proc(); + let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); + self.vd_write_record.vd_bytes.resize(len as usize, 0u8); + for i in 0..len { + let offset = start + i; + let vreg_index = offset / vlen_in_bytes; + let vreg_offset = offset % vlen_in_bytes; + let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); + self.vd_write_record.vd_bytes[i as usize] = cur_byte; + } + } + + Ok(()) + } + + pub fn log_arch_changes(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + if self.do_log_vrf { + self.log_vrf_write(spike, vlen).unwrap(); + self.log_reg_write(spike).unwrap(); + } + self.log_mem_write(spike).unwrap(); + self.log_mem_read(spike).unwrap(); + + Ok(()) + } + + fn log_vrf_write(&mut self, spike: &Spike, vlen: u32) -> anyhow::Result<()> { + let proc = spike.get_proc(); + // record vrf writes + // note that we do not need log_reg_write to find records, we just decode the + // insn and compare bytes + let vlen_in_bytes = vlen / 8; + let (start, len) = self.get_vrf_write_range(vlen_in_bytes).unwrap(); + trace!("vrf write range: start: {start}, len: {len}"); + for i in 0..len { + let offset = start + i; + let origin_byte = self.vd_write_record.vd_bytes[i as usize]; + let vreg_index = offset / vlen_in_bytes; + let vreg_offset = offset % vlen_in_bytes; + let cur_byte = proc.get_vreg_data(vreg_index, vreg_offset); + if origin_byte != cur_byte { + self + .vrf_access_record + .all_writes + .entry(offset as usize) + .or_insert(SingleVrfWrite { byte: cur_byte, executed: false }); + trace!( + "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, change_to={cur_byte}, vrf_idx={offset}", + vec![offset / vlen_in_bytes, offset % vlen_in_bytes], + ); + } else { + trace!( + "SpikeVRFChange: vrf={:?}, change_from={origin_byte}, not changed, vrf_idx={offset}", + vec![offset / vlen_in_bytes, offset % vlen_in_bytes], + ); + } + } + Ok(()) + } + + fn log_reg_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + // in spike, log_reg_write is arrange: + // xx0000 <- x + // xx0001 <- f + // xx0010 <- vreg + // xx0011 <- vec + // xx0100 <- csr + let reg_write_size = state.get_reg_write_size(); + // TODO: refactor it. + (0..reg_write_size).for_each(|idx| match state.get_reg_write_index(idx) & 0xf { + 0b0000 => { + // scalar rf + let data = state.get_reg(self.rd_idx, false); + self.is_rd_written = true; + if data != self.rd_bits { + trace!( + "ScalarRFChange: idx={}, change_from={}, change_to={data}", + self.rd_idx, + self.rd_bits + ); + self.rd_bits = data; + } + } + 0b0001 => { + let data = state.get_reg(self.rd_idx, true); + self.is_rd_written = true; + if data != self.rd_bits { + trace!( + "FloatRFChange: idx={}, change_from={}, change_to={data}", + self.rd_idx, + self.rd_bits + ); + self.rd_bits = data; + } + } + _ => trace!( + "UnknownRegChange, idx={}, spike detect unknown reg change", + state.get_reg_write_index(idx) + ), + }); + + Ok(()) + } + + pub fn log_mem_write(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_write_size = state.get_mem_write_size(); + (0..mem_write_size).for_each(|i| { + let (addr, value, size) = state.get_mem_write(i); + (0..size).for_each(|offset| { + self + .mem_access_record + .all_writes + .entry(addr + offset as u32) + .or_insert(MemWriteRecord { writes: vec![], num_completed_writes: 0 }) + .writes + .push(SingleMemWrite { + val: (value >> (offset * 8)) as u8, + executed: false, + }); + }); + trace!("SpikeMemWrite: addr={addr:x}, value={value:x}, size={size}"); + }); + + Ok(()) + } + + fn log_mem_read(&mut self, spike: &Spike) -> anyhow::Result<()> { + let proc = spike.get_proc(); + let state = proc.get_state(); + + let mem_read_size = state.get_mem_read_size(); + (0..mem_read_size).for_each(|i| { + let (addr, size) = state.get_mem_read(i); + let mut value = 0; + (0..size).for_each(|offset| { + let byte = spike.mem_byte_on_addr(addr as usize + offset as usize).unwrap(); + value |= (byte as u64) << (offset * 8); + // record the read + self + .mem_access_record + .all_reads + .entry(addr + offset as u32) + .or_insert(MemReadRecord { reads: vec![], num_completed_reads: 0 }) + .reads + .push(SingleMemRead { val: byte, executed: false }); + }); + trace!("SpikeMemRead: addr={addr:08x}, value={value:08x}, size={size}"); + }); + + Ok(()) + } + + pub fn check_rd(&self, data: u32) -> anyhow::Result<()> { + // TODO: rtl should indicate whether resp_bits_data is valid + if self.is_rd_written { + assert_eq!( + data, self.rd_bits, + "expect to write rd[{}] = {}, actual {}", + self.rd_idx, self.rd_bits, data + ); + } + + Ok(()) + } + + pub fn check_is_ready_for_commit(&self, cycle: u64) -> anyhow::Result<()> { + for (addr, record) in &self.mem_access_record.all_writes { + assert_eq!( + record.num_completed_writes, + record.writes.len(), + "[{cycle}] expect to write mem {addr:#x}, not executed when commit, issue_idx={} ({})", + self.issue_idx, + self.describe_insn(), + ); + } + for (idx, record) in &self.vrf_access_record.all_writes { + assert!( + record.executed, + "[{cycle}] expect to write vrf {idx}, not executed when commit, issue_idx={} ({})", + self.issue_idx, + self.describe_insn() + ); + } + + Ok(()) + } +} diff --git a/t1rocketemu/spike_rs/src/util.rs b/t1rocketemu/spike_rs/src/util.rs new file mode 100644 index 000000000..6ded0eec5 --- /dev/null +++ b/t1rocketemu/spike_rs/src/util.rs @@ -0,0 +1,65 @@ +use crate::Spike; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use xmas_elf::program::{ProgramHeader, Type}; +use xmas_elf::{header, ElfFile}; + +pub fn load_elf(spike: &mut Spike, fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + spike.load_bytes_to_mem(addr, size, slice.to_vec()).unwrap(); + } + } + } + + Ok(header.pt2.entry_point()) +} + +// todo: unify load_elf and load_elf_to_buffer +pub fn load_elf_to_buffer(mem: &mut [u8], fname: &Path) -> anyhow::Result { + let mut file = File::open(fname).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + let elf_file = ElfFile::new(&buffer).unwrap(); + + let header = elf_file.header; + assert_eq!(header.pt2.machine().as_machine(), header::Machine::RISC_V); + assert_eq!(header.pt1.class(), header::Class::ThirtyTwo); + + for ph in elf_file.program_iter() { + if let ProgramHeader::Ph32(ph) = ph { + if ph.get_type() == Ok(Type::Load) { + let offset = ph.offset as usize; + let size = ph.file_size as usize; + let addr = ph.virtual_addr as usize; + + let slice = &buffer[offset..offset + size]; + + let dst: &mut _ = &mut mem[addr..addr + size]; + for (i, byte) in slice.iter().enumerate() { + dst[i] = *byte; + } + } + } + } + + Ok(header.pt2.entry_point()) +} diff --git a/t1rocketemu/test_common/Cargo.toml b/t1rocketemu/test_common/Cargo.toml new file mode 100644 index 000000000..d5b3f32aa --- /dev/null +++ b/t1rocketemu/test_common/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "common" +version.workspace = true +edition = "2021" + +[dependencies] +spike_rs = { path = "../spike_rs" } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } diff --git a/t1rocketemu/test_common/src/lib.rs b/t1rocketemu/test_common/src/lib.rs new file mode 100644 index 000000000..e9690d3a2 --- /dev/null +++ b/t1rocketemu/test_common/src/lib.rs @@ -0,0 +1,63 @@ +use anyhow::Result; +use clap::Parser; +use spike_rs::Spike; +use std::path::PathBuf; +use tracing::Level; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; + +pub mod rtl_config; +pub mod spike_runner; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +pub struct CommonArgs { + /// Path to the ELF file + #[arg(long)] + pub elf_file: PathBuf, + + /// Path to the log file + #[arg(long)] + pub log_file: Option, + + /// Log level: trace, debug, info, warn, error + #[arg(long, default_value = "info")] + pub log_level: String, + + /// vlen config + #[arg(long, default_value = env!("DESIGN_VLEN"))] + pub vlen: u32, + + /// dlen config + #[arg(long, default_value = env!("DESIGN_DLEN"))] + pub dlen: u32, + + /// ISA config + #[arg(long, default_value = env!("SPIKE_ISA_STRING"))] + pub set: String, +} + +pub static MEM_SIZE: usize = 1usize << 32; + +impl CommonArgs { + pub fn to_spike_c_handler(&self) -> Box { + let lvl = "M"; + + Spike::new(&self.set, lvl, (self.dlen / 32) as usize, MEM_SIZE) + } + + pub fn setup_logger(&self) -> Result<()> { + // setup log + let log_level: Level = self.log_level.parse()?; + let global_logger = FmtSubscriber::builder() + .with_env_filter(EnvFilter::from_default_env()) + .with_max_level(log_level) + .without_time() + .with_target(false) + .with_ansi(true) + .compact() + .finish(); + tracing::subscriber::set_global_default(global_logger) + .expect("internal error: fail to setup log subscriber"); + Ok(()) + } +} diff --git a/t1rocketemu/test_common/src/rtl_config.rs b/t1rocketemu/test_common/src/rtl_config.rs new file mode 100644 index 000000000..0daf72624 --- /dev/null +++ b/t1rocketemu/test_common/src/rtl_config.rs @@ -0,0 +1,20 @@ +pub struct RTLConfig { + pub vlen: u32, + pub dlen: u32, +} + +// TODO: read from json + +impl RTLConfig { + pub fn xlen(&self) -> u32 { + 32 // TODO: configurable + } + + pub fn vlen_in_bytes(&self) -> u32 { + self.vlen / 8 + } + + pub fn lane_num(&self) -> u32 { + self.dlen / self.xlen() + } +} diff --git a/t1rocketemu/test_common/src/spike_runner.rs b/t1rocketemu/test_common/src/spike_runner.rs new file mode 100644 index 000000000..b9339be7a --- /dev/null +++ b/t1rocketemu/test_common/src/spike_runner.rs @@ -0,0 +1,141 @@ +use std::collections::VecDeque; +use std::path::Path; +use tracing::debug; + +use spike_rs::spike_event::SpikeEvent; +use spike_rs::util::load_elf; +use spike_rs::Spike; + +use crate::CommonArgs; + +pub struct SpikeRunner { + spike: Box, + + /// commit queue + /// in the spike thread, spike should detech if this queue is full, if not + /// full, execute until a vector instruction, record the behavior of this + /// instruction, and send to commit queue. + /// Note: + /// - The event issued earliest is at the back of the queue + /// - The queue may contain at most one unissued event. If so, the unissued event must be at the + /// front of the queue, and it must be a fence + pub commit_queue: VecDeque, + + /// config for v extension + pub vlen: u32, + pub dlen: u32, + + /// implement the get_t() for mcycle csr update + pub cycle: u64, + + /// for mcycle csr update + pub spike_cycle: u64, + + pub do_log_vrf: bool, +} + +impl SpikeRunner { + pub fn new(args: &CommonArgs, do_log_vrf: bool) -> Self { + // load the elf file + // initialize spike + let mut spike = args.to_spike_c_handler(); + + let entry_addr = load_elf(&mut spike, Path::new(&args.elf_file)).unwrap(); + + // initialize processor + let proc = spike.get_proc(); + let state = proc.get_state(); + proc.reset(); + state.set_pc(entry_addr); + + SpikeRunner { + spike, + commit_queue: VecDeque::new(), + vlen: args.vlen, + dlen: args.dlen, + cycle: 0, + spike_cycle: 0, + do_log_vrf, + } + } + + pub fn load_elf(&mut self, fname: &Path) -> anyhow::Result { + load_elf(&mut *self.spike, fname) + } + + // just execute one instruction for non-difftest + pub fn exec(&self) -> anyhow::Result<()> { + let spike = &self.spike; + let proc = spike.get_proc(); + let state = proc.get_state(); + + let new_pc = proc.func(); + + state.handle_pc(new_pc).unwrap(); + + let ret = state.exit(); + + if ret == 0 { + return Err(anyhow::anyhow!("simulation finished!")); + } + + Ok(()) + } + + // execute the spike processor for one instruction and record + // the spike event for difftest + pub fn spike_step(&mut self) -> SpikeEvent { + let spike = &self.spike; + let proc = self.spike.get_proc(); + let state = proc.get_state(); + + let mcycle = (self.cycle + self.spike_cycle) as usize; + state.set_mcycle(0); + + let mut event = SpikeEvent::new(spike, self.do_log_vrf); + state.clear(); + + let new_pc = if event.is_v() || event.is_exit() { + // inst is v / quit + debug!( + "SpikeStep: spike run vector insn ({}), mcycle={mcycle}", + event.describe_insn(), + ); + event.pre_log_arch_changes(spike, self.vlen).unwrap(); + let new_pc_ = proc.func(); + event.log_arch_changes(spike, self.vlen).unwrap(); + new_pc_ + } else { + // inst is scalar + debug!( + "SpikeStep: spike run scalar insn ({}), mcycle={mcycle}", + event.describe_insn(), + ); + let new_pc_ = proc.func(); + event.log_mem_write(spike).unwrap(); + new_pc_ + }; + + state.handle_pc(new_pc).unwrap(); + + self.spike_cycle += 1; + + event + } + + pub fn find_v_se_to_issue(&mut self) -> SpikeEvent { + if !self.commit_queue.is_empty() && self.commit_queue.front().unwrap().is_vfence() { + // if the front (latest) se is a vfence, return the vfence + self.commit_queue.front().unwrap().clone() + } else { + // else, loop until find a se, and push the se to the front + loop { + let se = self.spike_step(); + if se.is_v() { + self.commit_queue.push_front(se.clone()); + break se.clone(); + } + } + } + } +} diff --git a/t1rocketemu/vcs.nix b/t1rocketemu/vcs.nix new file mode 100644 index 000000000..e69de29bb diff --git a/t1rocketemu/verilator.nix b/t1rocketemu/verilator.nix new file mode 100644 index 000000000..9fb575a35 --- /dev/null +++ b/t1rocketemu/verilator.nix @@ -0,0 +1,90 @@ +{ lib +, enableDebugging +, libspike +, libspike_interfaces +, callPackage +, elaborateConfig + +, rustPlatform + +, rust-analyzer +, rust-bindgen + +, verilator +, verilated +, cmake +, clang-tools +}: + +let + self = rustPlatform.buildRustPackage { + name = "verilator-emu" + (lib.optionalString verilated.enable-trace "-trace"); + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./spike_rs + ./offline + ./online_dpi + ./online_drive + ./online_vcs + ./test_common + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + libspike_interfaces + verilated + ]; + + nativeBuildInputs = [ + verilator + cmake + ]; + + buildFeatures = lib.optionals verilated.enable-trace [ "trace" ]; + + env = { + VERILATED_INC_DIR = "${verilated}/include"; + VERILATED_LIB_DIR = "${verilated}/lib"; + SPIKE_LIB_DIR = "${libspike}/lib"; + SPIKE_INTERFACES_LIB_DIR = "${libspike_interfaces}/lib"; + SPIKE_ISA_STRING = + "rv32gc" + + (builtins.concatStringsSep "_" elaborateConfig.parameter.extensions) + + "_Zvl${toString elaborateConfig.parameter.vLen}b"; + DESIGN_VLEN = elaborateConfig.parameter.vLen; + DESIGN_DLEN = elaborateConfig.parameter.dLen; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + dontUseCmakeConfigure = true; + + passthru = { + devShell = self.overrideAttrs (old: { + nativeBuildInputs = old.nativeBuildInputs ++ [ + rust-analyzer + rust-bindgen + clang-tools + ]; + }); + inherit libspike_interfaces; + + # enable debug info for difftest itself and libspike + withDebug = self.overrideAttrs (old: { + cargoBuildType = "debug"; + doCheck = false; + env = old.env // { + SPIKE_LIB_DIR = "${enableDebugging libspike}/lib"; + }; + dontStrip = true; + }); + }; + }; +in +self From 1e7eb5eecec5d9fa115aec51f82ca427624d7dc7 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Sun, 4 Aug 2024 11:00:03 +0800 Subject: [PATCH 127/140] [elaborator] fix formatting --- elaborator/src/Main.scala | 66 ++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/elaborator/src/Main.scala b/elaborator/src/Main.scala index 1e38d4f13..8cd5bc48f 100644 --- a/elaborator/src/Main.scala +++ b/elaborator/src/Main.scala @@ -29,11 +29,15 @@ object Main { ).foldLeft( Seq( chisel3.stage.ChiselGeneratorAnnotation(gen), - chisel3.panamaconverter.stage.FirtoolOptionsAnnotation(FirtoolOptions(Set( - BuildMode(BuildModeDebug), - PreserveValues(PreserveValuesModeNamed), - DisableUnknownAnnotations(true) - ))), + chisel3.panamaconverter.stage.FirtoolOptionsAnnotation( + FirtoolOptions( + Set( + BuildMode(BuildModeDebug), + PreserveValues(PreserveValuesModeNamed), + DisableUnknownAnnotations(true) + ) + ) + ) ): firrtl.AnnotationSeq ) { case (annos, stage) => stage.transform(annos) } .flatMap { @@ -43,9 +47,9 @@ object Main { case chisel3.panamaconverter.stage.PanamaCIRCTConverterAnnotation(converter) => if (binderMlirbcOut.nonEmpty) panamaCIRCTConverter = converter None - case _: chisel3.panamaconverter.stage.FirtoolOptionsAnnotation => None - case _: chisel3.stage.DesignAnnotation[_] => None - case _: chisel3.stage.ChiselCircuitAnnotation => None + case _: chisel3.panamaconverter.stage.FirtoolOptionsAnnotation => None + case _: chisel3.stage.DesignAnnotation[_] => None + case _: chisel3.stage.ChiselCircuitAnnotation => None case a => Some(a) } @@ -64,42 +68,48 @@ object Main { case class IPConfig( @arg(name = "ip-config", short = 'c') ipConfig: os.Path) { def generator = upickle.default - .read[chisel3.experimental.SerializableModuleGenerator[org.chipsalliance.t1.rtl.T1, org.chipsalliance.t1.rtl.T1Parameter]](ujson.read(os.read(ipConfig))) + .read[chisel3.experimental.SerializableModuleGenerator[ + org.chipsalliance.t1.rtl.T1, + org.chipsalliance.t1.rtl.T1Parameter + ]](ujson.read(os.read(ipConfig))) def parameter: T1Parameter = generator.parameter } case class RocketConfig( @arg(name = "rocket-config", short = 'c') rocketConfig: os.Path) { def generator = upickle.default - .read[chisel3.experimental.SerializableModuleGenerator[org.chipsalliance.rocketv.RocketTile, org.chipsalliance.rocketv.RocketTileParameter]](ujson.read(os.read(rocketConfig))) + .read[chisel3.experimental.SerializableModuleGenerator[ + org.chipsalliance.rocketv.RocketTile, + org.chipsalliance.rocketv.RocketTileParameter + ]](ujson.read(os.read(rocketConfig))) def parameter: RocketTileParameter = generator.parameter } case class T1RocketConfig( - @arg(name = "t1rocket-config", short = 'c') rocketConfig: os.Path) { + @arg(name = "t1rocket-config", short = 'c') rocketConfig: os.Path) { def generator = upickle.default - .read[chisel3.experimental.SerializableModuleGenerator[org.chipsalliance.t1.tile.T1RocketTile, org.chipsalliance.t1.tile.T1RocketTileParameter]](ujson.read(os.read(rocketConfig))) + .read[chisel3.experimental.SerializableModuleGenerator[ + org.chipsalliance.t1.tile.T1RocketTile, + org.chipsalliance.t1.tile.T1RocketTileParameter + ]](ujson.read(os.read(rocketConfig))) def parameter: T1RocketTileParameter = generator.parameter } - implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] - implicit def rocketConfig: ParserForClass[RocketConfig] = ParserForClass[RocketConfig] + implicit def ipConfig: ParserForClass[IPConfig] = ParserForClass[IPConfig] + implicit def rocketConfig: ParserForClass[RocketConfig] = ParserForClass[RocketConfig] implicit def t1RocketConfig: ParserForClass[T1RocketConfig] = ParserForClass[T1RocketConfig] - // format: off - @main def ip(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => - ipConfig.generator.module() - ) - @main def ipemu(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = elaborateConfig.elaborate(() => - new org.chipsalliance.t1.ipemu.TestBench(ipConfig.generator) - ) - @main def rocketemu(elaborateConfig: ElaborateConfig, rocketConfig: RocketConfig): Unit = elaborateConfig.elaborate(() => - new org.chipsalliance.t1.rocketv.TestBench(rocketConfig.generator) - ) - @main def t1rocketemu(elaborateConfig: ElaborateConfig, t1rocketConfig: T1RocketConfig): Unit = elaborateConfig.elaborate(() => - new org.chipsalliance.t1.t1rocketemu.TestBench(t1rocketConfig.generator) - ) - // format: on + @main def ip(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = + elaborateConfig.elaborate(() => ipConfig.generator.module()) + + @main def ipemu(elaborateConfig: ElaborateConfig, ipConfig: IPConfig): Unit = + elaborateConfig.elaborate(() => new org.chipsalliance.t1.ipemu.TestBench(ipConfig.generator)) + + @main def rocketemu(elaborateConfig: ElaborateConfig, rocketConfig: RocketConfig): Unit = + elaborateConfig.elaborate(() => new org.chipsalliance.t1.rocketv.TestBench(rocketConfig.generator)) + + @main def t1rocketemu(elaborateConfig: ElaborateConfig, t1rocketConfig: T1RocketConfig): Unit = + elaborateConfig.elaborate(() => new org.chipsalliance.t1.t1rocketemu.TestBench(t1rocketConfig.generator)) def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) } From 2fd9dd2e33c447772d5361cc463ede87c4d35409 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 5 Aug 2024 00:04:14 +0800 Subject: [PATCH 128/140] [nix] add derivation for t1rocketemu --- nix/t1/default.nix | 2 + nix/t1/t1.nix | 4 +- t1rocketemu/configs/default.json | 17 +++++++ t1rocketemu/default.nix | 67 +++++++++++++++++++++++++ t1rocketemu/nix/mlirbc.nix | 23 +++++++++ t1rocketemu/nix/rtl.nix | 26 ++++++++++ t1rocketemu/nix/verilated-c-lib.nix | 78 +++++++++++++++++++++++++++++ 7 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 t1rocketemu/configs/default.json create mode 100644 t1rocketemu/default.nix create mode 100644 t1rocketemu/nix/mlirbc.nix create mode 100644 t1rocketemu/nix/rtl.nix create mode 100644 t1rocketemu/nix/verilated-c-lib.nix diff --git a/nix/t1/default.nix b/nix/t1/default.nix index 2d5829d42..33bf22df7 100644 --- a/nix/t1/default.nix +++ b/nix/t1/default.nix @@ -37,6 +37,8 @@ lib.makeScope newScope rocketv = self.callPackage ../../rocketemu { }; + t1rocketemu = self.callPackage ../../t1rocketemu { }; + omreader-unwrapped = self.callPackage ./omreader.nix { }; submodules = self.callPackage ./submodules.nix { }; diff --git a/nix/t1/t1.nix b/nix/t1/t1.nix index b7eda72bd..c4efaada7 100644 --- a/nix/t1/t1.nix +++ b/nix/t1/t1.nix @@ -30,8 +30,8 @@ let ./../../elaborator ./../../configgen/src ./../../rocketv - ./../../t1rocket - ./../../t1rocketemu + ./../../t1rocket/src + ./../../t1rocketemu/src ./../../rocketemu/src ]; }; diff --git a/t1rocketemu/configs/default.json b/t1rocketemu/configs/default.json new file mode 100644 index 000000000..14aa529a3 --- /dev/null +++ b/t1rocketemu/configs/default.json @@ -0,0 +1,17 @@ +{ + "instructionSets": ["rv32_i", "rv_a", "rv_v", "Zve32x", "zvl1024b", "rv_c"], + "cacheBlockBytes": 32, + "nPMPs": 8, + "cacheable": "b1???????????????????????????????", + "sideEffects": "b000?????????????????????????????", + "dcacheNSets": 64, + "dcacheNWays": 4, + "dcacheRowBits": 32, + "iCacheNSets": 32, + "iCacheNWays": 4, + "iCachePrefetch": false, + "dLen": 256, + "vrfBankSize": 2, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w" +} + diff --git a/t1rocketemu/default.nix b/t1rocketemu/default.nix new file mode 100644 index 000000000..ae841da2c --- /dev/null +++ b/t1rocketemu/default.nix @@ -0,0 +1,67 @@ +{ lib +, newScope +, rustPlatform +, zlib +, libspike +, libspike_interfaces +, cmake +, verilator +}: +lib.makeScope newScope (scope: rec { + mlirbc = scope.callPackage ./nix/mlirbc.nix { }; + rtl = scope.callPackage ./nix/rtl.nix { }; + verilated-c-lib = scope.callPackage ./nix/verilated-c-lib.nix { }; + + emu = rustPlatform.buildRustPackage { + name = "rocketemu"; + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./test_common + ./spike_rs + ./offline + ./online_dpi + ./online_drive + ./online_vcs + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + zlib + libspike_interfaces + verilated-c-lib + ]; + + nativeBuildInputs = [ + verilator + cmake + ]; + + # FIXME: can we hack this into derivations, so that we don't need to specify library dir explicitly? + env = + let + toLib = drv: "${drv}/lib"; + in + { + SPIKE_LIB_DIR = toLib libspike; + SPIKE_INTERFACES_LIB_DIR = toLib libspike_interfaces; + VERILATED_INC_DIR = "${verilated-c-lib}/include"; + VERILATED_LIB_DIR = "${verilated-c-lib}/lib"; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + outputs = [ "out" "driver" "offline" ]; + + postInstall = '' + mkdir -p $driver/bin $offline/bin + ln -s $out/bin/driver $driver/bin/driver + ln -s $out/bin/offline $driver/bin/offline + ''; + }; +}) diff --git a/t1rocketemu/nix/mlirbc.nix b/t1rocketemu/nix/mlirbc.nix new file mode 100644 index 000000000..73a1b327b --- /dev/null +++ b/t1rocketemu/nix/mlirbc.nix @@ -0,0 +1,23 @@ +{ stdenvNoCC + +, espresso +, circt + +, elaborator +, config +}: +stdenvNoCC.mkDerivation { + name = "t1-rocketv-elaborated.mlirbc"; + + nativeBuildInputs = [ elaborator espresso circt ]; + + buildCommand = '' + mkdir elaborate + elaborator rocketemu --target-dir elaborate --t1rocket-config ${config} + firtool elaborate/*.fir \ + --annotation-file elaborate/*.anno.json \ + --emit-bytecode \ + --parse-only \ + -o $out + ''; +} diff --git a/t1rocketemu/nix/rtl.nix b/t1rocketemu/nix/rtl.nix new file mode 100644 index 000000000..93b41a309 --- /dev/null +++ b/t1rocketemu/nix/rtl.nix @@ -0,0 +1,26 @@ +{ stdenvNoCC +, lib + +, circt +, mlirbc +}: + +let + mfcArgs = lib.escapeShellArgs [ + "-O=debug" + "--split-verilog" + "--preserve-values=named" + "--lowering-options=verifLabels,omitVersionComment" + "--strip-debug-info" + ]; +in +stdenvNoCC.mkDerivation { + name = "t1rocket-rtl"; + nativeBuildInputs = [ circt ]; + + buildCommand = '' + mkdir -p $out + + firtool ${mlirbc} ${mfcArgs} -o $out + ''; +} diff --git a/t1rocketemu/nix/verilated-c-lib.nix b/t1rocketemu/nix/verilated-c-lib.nix new file mode 100644 index 000000000..eb5b4127f --- /dev/null +++ b/t1rocketemu/nix/verilated-c-lib.nix @@ -0,0 +1,78 @@ +{ lib +, fetchgit +, stdenv +, rocketv-rtl +, verilator +, enable-trace ? true +, zlib +}: + +let + rocket-chip-v-src = fetchgit { + url = "https://github.com/chipsalliance/rocket-chip.git"; + rev = "833385404d9c722bdfad3e453c19a3ac6f40dbf0"; + fetchSubmodules = false; + sparseCheckout = [ + "src/main/resources/vsrc" + ]; + hash = "sha256-CUq9VDwb7ZtclosgOWfDZMOpH+U/yBjL5CNiXZRiB80="; + }; +in +stdenv.mkDerivation { + name = "t1-rocketv-verilated"; + + src = rocketv-rtl; + + nativeBuildInputs = [ verilator ]; + + propagatedBuildInputs = lib.optionals enable-trace [ zlib ]; + + env.rocketChipVSrc = "${rocket-chip-v-src}/src/main/resources/vsrc/"; + + buildPhase = '' + runHook preBuild + + echo "[nix] running verilator" + # FIXME: fix all the warning and remove -Wno- flag here + verilator \ + -I"$rocketChipVSrc" \ + ${lib.optionalString enable-trace "--trace-fst"} \ + --timing \ + --threads 8 \ + --threads-max-mtasks 8000 \ + -O1 \ + -Wno-WIDTHEXPAND \ + -Wno-LATCH \ + --cc TestBench + + echo "[nix] building verilated C lib" + + # backup srcs + mkdir -p $out/share + cp -r obj_dir $out/share/verilated_src + + rm $out/share/verilated_src/*.dat + + # We can't use -C here because VTestBench.mk is generated with relative path + cd obj_dir + make -j "$NIX_BUILD_CORES" -f VTestBench.mk libVTestBench + + runHook postBuild + ''; + + hardeningDisable = [ "fortify" ]; + + passthru = { + inherit enable-trace rocket-chip-v-src; + }; + + installPhase = '' + runHook preInstall + + mkdir -p $out/include $out/lib + cp *.h $out/include + cp *.a $out/lib + + runHook postInstall + ''; +} From dbc55c90585c0f6a0afb3fc4d6c119455f392d3d Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Mon, 5 Aug 2024 01:12:12 +0800 Subject: [PATCH 129/140] [t1rocket] add missing dependencies and fix some bugs [t1rocket] add timeout check [t1rocket] use t1rocket_cosim_init instead of cosim_init to expose link bug --- t1rocketemu/Cargo.lock | 2 + t1rocketemu/default.nix | 2 +- t1rocketemu/online_dpi/Cargo.toml | 2 + t1rocketemu/online_dpi/src/dpi.rs | 33 +++++++++----- t1rocketemu/online_dpi/src/drive.rs | 71 +++++++++++++++++++++++++---- t1rocketemu/src/TestBench.scala | 23 +++++++--- t1rocketemu/test_common/src/lib.rs | 6 +-- 7 files changed, 110 insertions(+), 29 deletions(-) diff --git a/t1rocketemu/Cargo.lock b/t1rocketemu/Cargo.lock index 9e6740f4a..4eea84c63 100644 --- a/t1rocketemu/Cargo.lock +++ b/t1rocketemu/Cargo.lock @@ -288,12 +288,14 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" name = "online_dpi" version = "0.1.0" dependencies = [ + "anyhow", "clap", "common", "elf", "hex", "spike_rs", "tracing", + "tracing-subscriber", ] [[package]] diff --git a/t1rocketemu/default.nix b/t1rocketemu/default.nix index ae841da2c..6ca9fad04 100644 --- a/t1rocketemu/default.nix +++ b/t1rocketemu/default.nix @@ -13,7 +13,7 @@ lib.makeScope newScope (scope: rec { verilated-c-lib = scope.callPackage ./nix/verilated-c-lib.nix { }; emu = rustPlatform.buildRustPackage { - name = "rocketemu"; + name = "t1rocketemu"; src = with lib.fileset; toSource { root = ./.; diff --git a/t1rocketemu/online_dpi/Cargo.toml b/t1rocketemu/online_dpi/Cargo.toml index 6a6db720a..5d2cb5f5d 100644 --- a/t1rocketemu/online_dpi/Cargo.toml +++ b/t1rocketemu/online_dpi/Cargo.toml @@ -10,6 +10,8 @@ common = { path = "../test_common" } spike_rs = { path = "../spike_rs" } clap = { workspace = true } tracing = { workspace = true } +tracing-subscriber = { workspace = true } +anyhow = { workspace = true } elf = "0.7.4" hex = "0.4.3" diff --git a/t1rocketemu/online_dpi/src/dpi.rs b/t1rocketemu/online_dpi/src/dpi.rs index b977b690d..0e88d0265 100644 --- a/t1rocketemu/online_dpi/src/dpi.rs +++ b/t1rocketemu/online_dpi/src/dpi.rs @@ -38,7 +38,7 @@ unsafe fn load_from_payload( payload: &*const SvBitVecVal, data_width: usize, dlen: usize, -) -> (&[u8], &[u8]) { +) -> (Vec, &[u8]) { let src = *payload as *mut u8; let data_width_in_byte = dlen / 8; let strb_width_in_byte = dlen / data_width; @@ -67,7 +67,7 @@ unsafe fn load_from_payload( hex::encode(data), ); - (&masks, data) + (masks, data) } //---------------------- @@ -100,8 +100,8 @@ unsafe extern "C" fn axi_write_highBandwidthAXI( let mut driver = DPI_TARGET.lock().unwrap(); let driver = driver.as_mut().unwrap(); let data_width = 32; // TODO: get from driver - let (strobe, data) = load_from_payload(&payload, 32, driver.dlen); - driver.axi_write_high_bandwidth(awaddr as u32, awsize as u64, strobe, data); + let (strobe, data) = load_from_payload(&payload, 32, driver.dlen as usize); + driver.axi_write_high_bandwidth(awaddr as u32, awsize as u64, &strobe, data); } /// evaluate at AR fire at corresponding channel_id. @@ -158,7 +158,7 @@ unsafe extern "C" fn axi_write_indexedAccessAXI( let driver = driver.as_mut().unwrap(); let data_width = 32; // TODO: get from driver let (strobe, data) = load_from_payload(&payload, data_width, 32); - driver.axi_write_indexed_access(awaddr as u32, awsize as u64, strobe, data); + driver.axi_write_indexed_access(awaddr as u32, awsize as u64, &strobe, data); } /// evaluate at AR fire at corresponding channel_id. @@ -214,7 +214,7 @@ unsafe extern "C" fn axi_write_loadStoreAXI( let driver = driver.as_mut().unwrap(); let data_width = 32; // TODO: get from sim let (strobe, data) = load_from_payload(&payload, data_width, driver.dlen as usize); - driver.axi_write_load_store(awaddr as u32, awsize, strobe, data); + driver.axi_write_load_store(awaddr as u32, awsize as u64, &strobe, data); } #[no_mangle] @@ -241,7 +241,7 @@ unsafe extern "C" fn axi_read_loadStoreAXI( let mut driver = DPI_TARGET.lock().unwrap(); let driver = driver.as_mut().unwrap(); let response = driver.axi_read_load_store(araddr as u32, arsize as u64); - fill_axi_read_payload(payload, driver.dlen, &response.data); + fill_axi_read_payload(payload, driver.dlen, &response); } #[no_mangle] @@ -268,25 +268,36 @@ unsafe extern "C" fn axi_read_instructionFetchAXI( let mut driver = DPI_TARGET.lock().unwrap(); let driver = driver.as_mut().unwrap(); let response = driver.axi_read_instruction_fetch(araddr as u32, arsize as u64); - fill_axi_read_payload(payload, driver.dlen, &response.data); + fill_axi_read_payload(payload, driver.dlen, &response); } #[no_mangle] -unsafe extern "C" fn cosim_init() { +unsafe extern "C" fn t1rocket_cosim_init() { let args = OfflineArgs::parse(); args.common_args.setup_logger().unwrap(); - let scope = SvScope::get_current().expect("failed to get scope in cosim_init"); + let scope = SvScope::get_current().expect("failed to get scope in t1rocket_cosim_init"); let driver = Box::new(Driver::new(scope, &args)); let mut dpi_target = DPI_TARGET.lock().unwrap(); assert!( dpi_target.is_none(), - "cosim_init should be called only once" + "t1rocket_cosim_init should be called only once" ); *dpi_target = Some(driver); } +/// evaluate at every 1024 cycles, return reason = 0 to continue simulation, +/// other value is used as error code. +#[no_mangle] +unsafe extern "C" fn cosim_watchdog(reason: *mut c_char) { + // watchdog dpi call would be called before initialization, guard on null target + let mut driver = DPI_TARGET.lock().unwrap(); + if let Some(driver) = driver.as_mut() { + *reason = driver.watchdog() as c_char + } +} + #[no_mangle] unsafe extern "C" fn get_resetvector(target: *mut (), resetvector: *mut c_longlong) { if !target.is_null() { diff --git a/t1rocketemu/online_dpi/src/drive.rs b/t1rocketemu/online_dpi/src/drive.rs index 2b996eecf..bc022aafb 100644 --- a/t1rocketemu/online_dpi/src/drive.rs +++ b/t1rocketemu/online_dpi/src/drive.rs @@ -1,17 +1,23 @@ +use crate::dpi::*; +use crate::get_t; +use crate::svdpi::SvScope; +use crate::OfflineArgs; + +use anyhow::Context; use common::MEM_SIZE; use elf::{ abi::{EM_RISCV, ET_EXEC, PT_LOAD, STT_FUNC}, endian::LittleEndian, ElfStream, }; -use spike_rs::util::load_elf_to_buffer; +use std::collections::HashMap; +use std::os::unix::fs::FileExt; +use std::{ + fs, + path::{Path, PathBuf}, +}; use tracing::{debug, error, info, trace}; -use crate::dpi::*; -use crate::get_t; -use crate::svdpi::SvScope; -use crate::OfflineArgs; - struct ShadowMem { mem: Vec, } @@ -90,8 +96,18 @@ impl ShadowMem { } } +#[derive(Debug)] +#[allow(dead_code)] +pub struct FunctionSym { + #[allow(dead_code)] + pub(crate) name: String, + #[allow(dead_code)] + pub(crate) info: u8, +} +pub type FunctionSymTab = HashMap; + pub(crate) struct Driver { - // SvScope from t1_cosim_init + // SvScope from t1rocket_cosim_init scope: SvScope, #[cfg(feature = "trace")] @@ -106,6 +122,9 @@ pub(crate) struct Driver { pub(crate) dlen: u32, pub(crate) e_entry: u64, + timeout: u64, + last_commit_cycle: u64, + shadow_mem: ShadowMem, } @@ -147,7 +166,7 @@ impl Driver { let (dump_start, dump_end) = parse_range(&args.dump_range); // pass e_entry to rocket - let (e_entry, shadow_mem, fn_sym_tab) = + let (e_entry, shadow_mem, _fn_sym_tab) = Self::load_elf(&args.common_args.elf_file).expect("fail creating simulator"); Self { @@ -165,6 +184,9 @@ impl Driver { dlen: args.common_args.dlen, e_entry, + timeout: args.timeout, + last_commit_cycle: 0, + shadow_mem, } } @@ -330,6 +352,39 @@ impl Driver { AxiReadPayload { data } } + pub(crate) fn watchdog(&mut self) -> u8 { + const WATCHDOG_CONTINUE: u8 = 0; + const WATCHDOG_TIMEOUT: u8 = 1; + + let tick = get_t(); + if tick - self.last_commit_cycle > self.timeout { + error!( + "[{}] watchdog timeout (last_commit_cycle={})", + get_t(), + self.last_commit_cycle + ); + WATCHDOG_TIMEOUT + } else { + #[cfg(feature = "trace")] + if self.dump_end != 0 && tick > self.dump_end { + info!( + "[{tick}] run to dump end, exiting (last_commit_cycle={})", + self.last_commit_cycle + ); + return WATCHDOG_TIMEOUT; + } + + #[cfg(feature = "trace")] + if !self.dump_started && tick >= self.dump_start { + self.start_dump_wave(); + self.dump_started = true; + } + + trace!("[{}] watchdog continue", get_t()); + WATCHDOG_CONTINUE + } + } + #[cfg(feature = "trace")] fn start_dump_wave(&mut self) { dump_wave(self.scope, &self.wave_path); diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala index ac17980f6..dadb21c5b 100644 --- a/t1rocketemu/src/TestBench.scala +++ b/t1rocketemu/src/TestBench.scala @@ -34,9 +34,9 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil |`endif | endfunction; | - | import "DPI-C" context function void cosim_init(); + | import "DPI-C" context function void t1rocket_cosim_init(); | initial begin - | cosim_init(); + | t1rocket_cosim_init(); | clock = 1'b0; | reset = 1'b1; | end @@ -56,9 +56,21 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil dut.io.clock := clock dut.io.reset := reset - val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + // control simulation + val simulationTime: UInt = RegInit(0.U(64.W)) simulationTime := simulationTime + 1.U + // TODO: this initial way cannot happen before reset... + val initFlag = RegInit(false.B) + when(!initFlag) { + initFlag := true.B + printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") + } + val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + when(watchdog =/= 0.U) { + stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") + } + // get resetVector from simulator dut.io.resetVector := RawUnclockedNonVoidFunctionCall("get_resetvector", Const(UInt(64.W)))(simulationTime === 0.U) @@ -68,9 +80,6 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil dut.io.msip := 0.U dut.io.meip := 0.U dut.io.buserror := 0.U - dut.io.lip := 0.U - dut.io.wfi := 0.U - dut.io.halt := 0.U // memory driver Seq( @@ -146,4 +155,6 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil loadStoreAgent.io.channelId := 3.U loadStoreAgent.io.gateRead := false.B loadStoreAgent.io.gateWrite := false.B + + // probes } diff --git a/t1rocketemu/test_common/src/lib.rs b/t1rocketemu/test_common/src/lib.rs index e9690d3a2..18f2a4d42 100644 --- a/t1rocketemu/test_common/src/lib.rs +++ b/t1rocketemu/test_common/src/lib.rs @@ -24,15 +24,15 @@ pub struct CommonArgs { pub log_level: String, /// vlen config - #[arg(long, default_value = env!("DESIGN_VLEN"))] + #[arg(long)] pub vlen: u32, /// dlen config - #[arg(long, default_value = env!("DESIGN_DLEN"))] + #[arg(long)] pub dlen: u32, /// ISA config - #[arg(long, default_value = env!("SPIKE_ISA_STRING"))] + #[arg(long)] pub set: String, } From 29e764869e8959434420ce6a68be42dd949708be Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 5 Aug 2024 17:44:02 +0800 Subject: [PATCH 130/140] [t1rocketemu] fix wrong mlirbc Signed-off-by: Avimitin --- t1rocketemu/configs/default.json | 32 +++++++++++++++++--------------- t1rocketemu/nix/mlirbc.nix | 5 ++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/t1rocketemu/configs/default.json b/t1rocketemu/configs/default.json index 14aa529a3..bdb6154f2 100644 --- a/t1rocketemu/configs/default.json +++ b/t1rocketemu/configs/default.json @@ -1,17 +1,19 @@ { - "instructionSets": ["rv32_i", "rv_a", "rv_v", "Zve32x", "zvl1024b", "rv_c"], - "cacheBlockBytes": 32, - "nPMPs": 8, - "cacheable": "b1???????????????????????????????", - "sideEffects": "b000?????????????????????????????", - "dcacheNSets": 64, - "dcacheNWays": 4, - "dcacheRowBits": 32, - "iCacheNSets": 32, - "iCacheNWays": 4, - "iCachePrefetch": false, - "dLen": 256, - "vrfBankSize": 2, - "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w" + "parameter": { + "instructionSets": ["rv32_i", "rv_a", "rv_v", "Zve32x", "zvl1024b", "rv_c"], + "cacheBlockBytes": 32, + "nPMPs": 8, + "cacheable": "b1???????????????????????????????", + "sideEffects": "b000?????????????????????????????", + "dcacheNSets": 64, + "dcacheNWays": 4, + "dcacheRowBits": 32, + "iCacheNSets": 32, + "iCacheNWays": 4, + "iCachePrefetch": false, + "dLen": 256, + "vrfBankSize": 2, + "vrfRamType": "org.chipsalliance.t1.rtl.vrf.RamType.p0rp1w" + }, + "generator": "org.chipsalliance.t1.tile.T1RocketTile" } - diff --git a/t1rocketemu/nix/mlirbc.nix b/t1rocketemu/nix/mlirbc.nix index 73a1b327b..14573c9b8 100644 --- a/t1rocketemu/nix/mlirbc.nix +++ b/t1rocketemu/nix/mlirbc.nix @@ -4,16 +4,15 @@ , circt , elaborator -, config }: stdenvNoCC.mkDerivation { - name = "t1-rocketv-elaborated.mlirbc"; + name = "t1rocketemu-elaborated.mlirbc"; nativeBuildInputs = [ elaborator espresso circt ]; buildCommand = '' mkdir elaborate - elaborator rocketemu --target-dir elaborate --t1rocket-config ${config} + elaborator t1rocketemu --target-dir elaborate --t1rocket-config ${../configs/default.json} firtool elaborate/*.fir \ --annotation-file elaborate/*.anno.json \ --emit-bytecode \ From 36d5c7167d09b332bdcdfb24beeb0f811a90817f Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 5 Aug 2024 18:03:37 +0800 Subject: [PATCH 131/140] [nix] fix wrong RTL reference Signed-off-by: Avimitin --- t1rocketemu/nix/verilated-c-lib.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t1rocketemu/nix/verilated-c-lib.nix b/t1rocketemu/nix/verilated-c-lib.nix index eb5b4127f..147747ad4 100644 --- a/t1rocketemu/nix/verilated-c-lib.nix +++ b/t1rocketemu/nix/verilated-c-lib.nix @@ -1,7 +1,7 @@ { lib , fetchgit , stdenv -, rocketv-rtl +, rtl , verilator , enable-trace ? true , zlib @@ -19,9 +19,9 @@ let }; in stdenv.mkDerivation { - name = "t1-rocketv-verilated"; + name = "t1rocket-verilated"; - src = rocketv-rtl; + src = rtl; nativeBuildInputs = [ verilator ]; From 9dd42cfaf22ac93c8d30a4661fa8fdec651f035f Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Mon, 5 Aug 2024 21:11:11 +0800 Subject: [PATCH 132/140] [t1rocket] add online difftest [t1rocket] fix wrong get_resetvector [t1rocket] fix unsafe extern "C" function signatures in dpi.rs [t1rocket] update axi_read_load_store function to use correct size parameter [t1rocket] add probes [t1rocket] add last commit cycle during axi to control timeout [t1rocket] add vrf score board check [t1rocket] refactor event's name in offline difftest --- t1/src/T1.scala | 7 +- t1rocket/src/T1RocketTile.scala | 30 ++++++- t1rocketemu/offline/src/difftest.rs | 4 +- t1rocketemu/offline/src/json_events.rs | 41 ++++----- t1rocketemu/online_dpi/src/dpi.rs | 24 +++--- t1rocketemu/online_dpi/src/drive.rs | 23 ++--- t1rocketemu/src/TestBench.scala | 111 ++++++++++++++++++++++++- 7 files changed, 182 insertions(+), 58 deletions(-) diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 5a36bb040..7423d557a 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -307,6 +307,8 @@ class T1Probe(parameter: T1Parameter) extends Bundle { // probes val lsuProbe: LSUProbe = new LSUProbe(parameter.lsuParameters) val laneProbes: Vec[LaneProbe] = Vec(parameter.laneNumber, new LaneProbe(parameter.laneParam)) + val issue: ValidIO[UInt] = Valid(UInt(param.instructionIndexBits.W)) + val retire: ValidIO[UInt] = Valid(UInt(param.xLen.W)) } class T1Interface(parameter: T1Parameter) extends Record { @@ -1731,7 +1733,10 @@ class T1(val parameter: T1Parameter) probeWire.responseCounter := responseCounter probeWire.laneProbes.zip(laneVec).foreach { case (p, l) => p := probe.read(l.laneProbe) } probeWire.lsuProbe := probe.read(lsu.lsuProbe) - + probeWire.issue.valid := io.issue.fire + probeWire.issue.bits := instructionCounter + probeWire.retire.valid := io.retire.rd.valid + probeWire.retire.bits := io.retire.rd.bits.rdData // new V Request from core // val requestValidProbe: Bool = IO(Output(Probe(Bool()))) diff --git a/t1rocket/src/T1RocketTile.scala b/t1rocket/src/T1RocketTile.scala index 452f76084..321f7b8cd 100644 --- a/t1rocket/src/T1RocketTile.scala +++ b/t1rocket/src/T1RocketTile.scala @@ -7,13 +7,16 @@ import chisel3.experimental.hierarchy.{Instance, Instantiate} import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter} import chisel3.util.experimental.BitSet import chisel3.util.log2Ceil +import chisel3.probe.{Probe, ProbeValue, define} import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable} -import org.chipsalliance.rocketv.{BHTParameter, FPU, FPUParameter, Frontend, FrontendParameter, HellaCache, HellaCacheArbiter, HellaCacheArbiterParameter, HellaCacheParameter, PTW, PTWParameter, Rocket, RocketParameter, RocketTileParameter} +import org.chipsalliance.rocketv.{BHTParameter, FPU, FPUParameter, Frontend, FrontendParameter, HellaCache, HellaCacheArbiter, HellaCacheArbiterParameter, HellaCacheParameter, PTW, PTWParameter, Rocket, RocketParameter, RocketTileParameter, RocketProbe} import org.chipsalliance.rvdecoderdb.Instruction import org.chipsalliance.t1.rtl.decoder.T1CustomInstruction import org.chipsalliance.t1.rtl.vrf.RamType import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw} -import org.chipsalliance.t1.rtl.{LaneAdder, LaneAdderParam, LaneDiv, LaneDivFP, LaneDivFPParam, LaneDivParam, LaneFloat, LaneFloatParam, LaneMul, LaneMulParam, LaneShifter, LaneShifterParameter, LogicParam, MaskedLogic, OtherUnit, OtherUnitParam, T1, T1Parameter, VFUInstantiateParameter} +import org.chipsalliance.t1.rtl.lsu.LSUProbe +import org.chipsalliance.t1.rtl.vrf.VRFProbe +import org.chipsalliance.t1.rtl.{LaneAdder, LaneAdderParam, LaneDiv, LaneDivFP, LaneDivFPParam, LaneDivParam, LaneFloat, LaneFloatParam, LaneMul, LaneMulParam, LaneShifter, LaneShifterParameter, LogicParam, MaskedLogic, OtherUnit, OtherUnitParam, T1, T1Parameter, VFUInstantiateParameter, T1Probe, LaneProbe} object T1RocketTileParameter { implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default @@ -452,6 +455,22 @@ class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { val highBandwidthAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HighBandwidthParameter) val highOutstandingAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HightOutstandingParameter) + + val rocketProbe: RocketProbe = Output(Probe(new RocketProbe(parameter.rocketParameter))) + val t1Probe: T1Probe = Output(Probe(new T1Probe(parameter.t1Parameter))) + val lsuProbe: LSUProbe = Output(Probe(new LSUProbe(parameter.t1Parameter.lsuParameters))) + val laneProbes: Vec[LaneProbe] = Vec(parameter.t1Parameter.laneNumber, Output(Probe(new LaneProbe( + parameter.t1Parameter.laneParam.chainingSize, + parameter.t1Parameter.laneParam.instructionIndexBits + ))) + ) + val laneVrfProbes: Vec[VRFProbe] = Vec(parameter.t1Parameter.laneNumber, Output(Probe(new VRFProbe( + parameter.t1Parameter.laneParam.vrfParam.regNumBits, + parameter.t1Parameter.laneParam.vrfParam.vrfOffsetBits, + parameter.t1Parameter.laneParam.vrfParam.instructionIndexBits, + parameter.t1Parameter.laneParam.vrfParam.datapathWidth + ))) + ) } class T1RocketTile(val parameter: T1RocketTileParameter) @@ -540,4 +559,11 @@ class T1RocketTile(val parameter: T1RocketTileParameter) t1.io.reset := io.reset io.highBandwidthAXI <> t1.io.highBandwidthLoadStorePort io.highOutstandingAXI <> t1.io.indexedLoadStorePort + + // probe + define(io.rocketProbe, rocket.io.rocketProbe) + define(io.t1Probe, t1.io.t1Probe) + define(io.lsuProbe, t1.io.lsuProbe) + io.laneProbes.zipWithIndex.foreach { case (io, index) => define(io, t1.io.laneProbes(index)) } + io.laneVrfProbes.zipWithIndex.foreach { case (io, index) => define(io, t1.io.laneVrfProbes(index)) } } diff --git a/t1rocketemu/offline/src/difftest.rs b/t1rocketemu/offline/src/difftest.rs index 66a0173d9..3789297f5 100644 --- a/t1rocketemu/offline/src/difftest.rs +++ b/t1rocketemu/offline/src/difftest.rs @@ -77,9 +77,9 @@ impl Difftest { self.runner.cycle = *cycle; self.runner.check_rd(&CheckRdEvent { data: *data, issue_idx: *issue_idx, cycle: *cycle }) } - JsonEvents::VrfScoreboardReport { count, issue_idx, cycle } => { + JsonEvents::VrfScoreboard { count, issue_idx, cycle } => { self.runner.cycle = *cycle; - self.runner.vrf_scoreboard_report(&VrfScoreboardReportEvent { + self.runner.vrf_scoreboard(&VrfScoreboardEvent { count: *count, issue_idx: *issue_idx, cycle: *cycle, diff --git a/t1rocketemu/offline/src/json_events.rs b/t1rocketemu/offline/src/json_events.rs index 24652f04d..3b19ddad5 100644 --- a/t1rocketemu/offline/src/json_events.rs +++ b/t1rocketemu/offline/src/json_events.rs @@ -4,16 +4,7 @@ use serde::{Deserialize, Deserializer}; use spike_rs::spike_event::LSU_IDX_DEFAULT; use tracing::{debug, info}; -#[derive(Deserialize, Debug, PartialEq, Clone)] -pub enum Opcode { - PutFullData = 0, - PutPartialData = 1, - Get = 4, - // AccessAckData = 0, - // AccessAck = 0, -} - -fn bigint_to_vec_u8<'de, D>(deserializer: D) -> Result, D::Error> +fn str_to_vec_u8<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, { @@ -23,7 +14,7 @@ where Ok(bigint.to_bytes_le()) } -fn bigint_to_vec_bool<'de, D>(deserializer: D) -> Result, D::Error> +fn str_to_vec_bool<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, { @@ -36,7 +27,7 @@ where Ok(bools) } -fn hex_to_u32<'de, D>(deserializer: D) -> Result +fn str_to_u32<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { @@ -73,30 +64,30 @@ pub(crate) enum JsonEvents { issue_idx: u8, vd: u32, offset: u32, - #[serde(deserialize_with = "bigint_to_vec_bool", default)] + #[serde(deserialize_with = "str_to_vec_bool", default)] mask: Vec, - #[serde(deserialize_with = "bigint_to_vec_u8", default)] + #[serde(deserialize_with = "str_to_vec_u8", default)] data: Vec, lane: u32, cycle: u64, }, MemoryWrite { - #[serde(deserialize_with = "bigint_to_vec_bool", default)] + #[serde(deserialize_with = "str_to_vec_bool", default)] mask: Vec, - #[serde(deserialize_with = "bigint_to_vec_u8", default)] + #[serde(deserialize_with = "str_to_vec_u8", default)] data: Vec, lsu_idx: u8, - #[serde(deserialize_with = "hex_to_u32", default)] + #[serde(deserialize_with = "str_to_u32", default)] address: u32, cycle: u64, }, CheckRd { - #[serde(deserialize_with = "hex_to_u32", default)] + #[serde(deserialize_with = "str_to_u32", default)] data: u32, issue_idx: u8, cycle: u64, }, - VrfScoreboardReport { + VrfScoreboard { count: u32, issue_idx: u8, cycle: u64, @@ -131,7 +122,7 @@ pub struct MemoryWriteEvent { pub cycle: u64, } -pub struct VrfScoreboardReportEvent { +pub struct VrfScoreboardEvent { pub count: u32, pub issue_idx: u8, pub cycle: u64, @@ -150,7 +141,7 @@ pub(crate) trait JsonEventRunner { fn peek_vrf_write(&mut self, vrf_write: &VrfWriteEvent) -> anyhow::Result<()>; - fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()>; + fn vrf_scoreboard(&mut self, vrf_scoreboard: &VrfScoreboardEvent) -> anyhow::Result<()>; fn peek_memory_write(&mut self, memory_write: &MemoryWriteEvent) -> anyhow::Result<()>; @@ -323,10 +314,10 @@ impl JsonEventRunner for SpikeRunner { panic!("[{cycle}] cannot find se with instruction lsu_idx={lsu_idx}") } - fn vrf_scoreboard_report(&mut self, report: &VrfScoreboardReportEvent) -> anyhow::Result<()> { - let count = report.count; - let issue_idx = report.issue_idx; - let cycle = report.cycle; + fn vrf_scoreboard(&mut self, vrf_scoreboard: &VrfScoreboardEvent) -> anyhow::Result<()> { + let count = vrf_scoreboard.count; + let issue_idx = vrf_scoreboard.issue_idx; + let cycle = vrf_scoreboard.cycle; let mut should_retire: Option = None; diff --git a/t1rocketemu/online_dpi/src/dpi.rs b/t1rocketemu/online_dpi/src/dpi.rs index 0e88d0265..82c33ebe3 100644 --- a/t1rocketemu/online_dpi/src/dpi.rs +++ b/t1rocketemu/online_dpi/src/dpi.rs @@ -134,7 +134,7 @@ unsafe extern "C" fn axi_read_highBandwidthAXI( /// evaluate after AW and W is finished at corresponding channel_id. #[no_mangle] -unsafe extern "C" fn axi_write_indexedAccessAXI( +unsafe extern "C" fn axi_write_highOutstandingAXI( channel_id: c_longlong, awid: c_longlong, awaddr: c_longlong, @@ -150,7 +150,7 @@ unsafe extern "C" fn axi_write_indexedAccessAXI( payload: *const SvBitVecVal, ) { debug!( - "axi_write_indexed (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ + "axi_write_high_outstanding (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ awprot={awprot}, awqos={awqos}, awregion={awregion})" ); @@ -158,12 +158,12 @@ unsafe extern "C" fn axi_write_indexedAccessAXI( let driver = driver.as_mut().unwrap(); let data_width = 32; // TODO: get from driver let (strobe, data) = load_from_payload(&payload, data_width, 32); - driver.axi_write_indexed_access(awaddr as u32, awsize as u64, &strobe, data); + driver.axi_write_high_outstanding(awaddr as u32, awsize as u64, &strobe, data); } /// evaluate at AR fire at corresponding channel_id. #[no_mangle] -unsafe extern "C" fn axi_read_indexedAccessAXI( +unsafe extern "C" fn axi_read_highOutstandingAXI( channel_id: c_longlong, arid: c_longlong, araddr: c_longlong, @@ -179,19 +179,18 @@ unsafe extern "C" fn axi_read_indexedAccessAXI( payload: *mut SvBitVecVal, ) { debug!( - "axi_read_indexed (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ + "axi_read_high_outstanding (channel_id={channel_id}, arid={arid}, araddr={araddr:#x}, \ arlen={arlen}, arsize={arsize}, arburst={arburst}, arlock={arlock}, arcache={arcache}, \ arprot={arprot}, arqos={arqos}, arregion={arregion})" ); let mut driver = DPI_TARGET.lock().unwrap(); let driver = driver.as_mut().unwrap(); - let response = driver.axi_read_indexed(araddr as u32, arsize as u64); + let response = driver.axi_read_high_outstanding(araddr as u32, arsize as u64); fill_axi_read_payload(payload, driver.dlen, &response); } #[no_mangle] unsafe extern "C" fn axi_write_loadStoreAXI( - target: *mut (), channel_id: c_longlong, awid: c_longlong, awaddr: c_longlong, @@ -207,7 +206,7 @@ unsafe extern "C" fn axi_write_loadStoreAXI( ) { debug!( "axi_write_loadStore (channel_id={channel_id}, awid={awid}, awaddr={awaddr:#x}, \ - awlen={awlen}, awsize=2^{awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ + awlen={awlen}, awsize={awsize}, awburst={awburst}, awlock={awlock}, awcache={awcache}, \ awprot={awprot}, awqos={awqos}, awregion={awregion})" ); let mut driver = DPI_TARGET.lock().unwrap(); @@ -219,7 +218,6 @@ unsafe extern "C" fn axi_write_loadStoreAXI( #[no_mangle] unsafe extern "C" fn axi_read_loadStoreAXI( - target: *mut (), channel_id: c_longlong, arid: c_longlong, araddr: c_longlong, @@ -246,7 +244,6 @@ unsafe extern "C" fn axi_read_loadStoreAXI( #[no_mangle] unsafe extern "C" fn axi_read_instructionFetchAXI( - target: *mut (), channel_id: c_longlong, arid: c_longlong, araddr: c_longlong, @@ -299,10 +296,9 @@ unsafe extern "C" fn cosim_watchdog(reason: *mut c_char) { } #[no_mangle] -unsafe extern "C" fn get_resetvector(target: *mut (), resetvector: *mut c_longlong) { - if !target.is_null() { - let mut driver = DPI_TARGET.lock().unwrap(); - let driver = driver.as_mut().unwrap(); +unsafe extern "C" fn get_resetvector(resetvector: *mut c_longlong) { + let mut driver = DPI_TARGET.lock().unwrap(); + if let Some(driver) = driver.as_mut() { *resetvector = driver.e_entry as c_longlong } } diff --git a/t1rocketemu/online_dpi/src/drive.rs b/t1rocketemu/online_dpi/src/drive.rs index bc022aafb..7e71114ac 100644 --- a/t1rocketemu/online_dpi/src/drive.rs +++ b/t1rocketemu/online_dpi/src/drive.rs @@ -260,6 +260,7 @@ impl Driver { let size = 1 << arsize; let data = self.shadow_mem.read_mem_axi(addr, size, self.dlen / 8); let data_hex = hex::encode(&data); + self.last_commit_cycle = get_t(); trace!( "[{}] axi_read_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", get_t() @@ -275,28 +276,29 @@ impl Driver { data: &[u8], ) { let size = 1 << awsize; - self.shadow_mem.write_mem_axi(addr, size, self.dlen / 8, &strobe, data); let data_hex = hex::encode(data); + self.last_commit_cycle = get_t(); trace!( "[{}] axi_write_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", get_t() ); } - pub(crate) fn axi_read_indexed(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { + pub(crate) fn axi_read_high_outstanding(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { let size = 1 << arsize; assert!(size <= 4); let data = self.shadow_mem.read_mem_axi(addr, size, 4); let data_hex = hex::encode(&data); + self.last_commit_cycle = get_t(); trace!( - "[{}] axi_read_indexed (addr={addr:#x}, size={size}, data={data_hex})", + "[{}] axi_read_high_outstanding (addr={addr:#x}, size={size}, data={data_hex})", get_t() ); AxiReadPayload { data } } - pub(crate) fn axi_write_indexed_access( + pub(crate) fn axi_write_high_outstanding( &mut self, addr: u32, awsize: u64, @@ -306,17 +308,18 @@ impl Driver { let size = 1 << awsize; self.shadow_mem.write_mem_axi(addr, size, 4, strobe, data); let data_hex = hex::encode(data); + self.last_commit_cycle = get_t(); trace!( - "[{}] axi_write_indexed_access (addr={addr:#x}, size={size}, data={data_hex})", + "[{}] axi_write_high_outstanding (addr={addr:#x}, size={size}, data={data_hex})", get_t() ); } pub(crate) fn axi_read_load_store(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { let size = 1 << arsize; - assert!(size <= 4); - let data = self.shadow_mem.read_mem_axi(addr, size, 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 32); let data_hex = hex::encode(&data); + self.last_commit_cycle = get_t(); trace!( "[{}] axi_read_load_store (addr={addr:#x}, size={size}, data={data_hex})", get_t() @@ -332,8 +335,9 @@ impl Driver { data: &[u8], ) { let size = 1 << awsize; - self.shadow_mem.write_mem_axi(addr, size, 4, strobe, data); + self.shadow_mem.write_mem_axi(addr, size, 32, strobe, data); let data_hex = hex::encode(data); + self.last_commit_cycle = get_t(); trace!( "[{}] axi_write_load_store (addr={addr:#x}, size={size}, data={data_hex})", get_t() @@ -342,8 +346,7 @@ impl Driver { pub(crate) fn axi_read_instruction_fetch(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { let size = 1 << arsize; - assert!(size <= 4); - let data = self.shadow_mem.read_mem_axi(addr, size, 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 32); let data_hex = hex::encode(&data); trace!( "[{}] axi_read_instruction_fetch (addr={addr:#x}, size={size}, data={data_hex})", diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala index dadb21c5b..aa431fedc 100644 --- a/t1rocketemu/src/TestBench.scala +++ b/t1rocketemu/src/TestBench.scala @@ -7,7 +7,7 @@ import chisel3._ import chisel3.experimental.{BaseModule, ExtModule, SerializableModuleGenerator} import chisel3.experimental.dataview.DataViewable import chisel3.util.circt.dpi.RawUnclockedNonVoidFunctionCall -import chisel3.util.HasExtModuleInline +import chisel3.util.{HasExtModuleInline, PopCount, UIntToOH, Valid} import org.chipsalliance.amba.axi4.bundle._ import org.chipsalliance.t1.t1rocketemu.dpi._ import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter} @@ -60,13 +60,13 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil val simulationTime: UInt = RegInit(0.U(64.W)) simulationTime := simulationTime + 1.U - // TODO: this initial way cannot happen before reset... - val initFlag = RegInit(false.B) + // this initial way cannot happen before reset + val initFlag: Bool = RegInit(false.B) when(!initFlag) { initFlag := true.B printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") } - val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + val watchdog: UInt = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) when(watchdog =/= 0.U) { stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") } @@ -157,4 +157,107 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil loadStoreAgent.io.gateWrite := false.B // probes + val rocketProbe = probe.read(dut.io.rocketProbe).suggestName(s"rocketProbe") + val t1Probe = probe.read(dut.io.t1Probe).suggestName(s"t1Probe") + val lsuProbe = probe.read(dut.io.lsuProbe).suggestName(s"lsuProbe") + val laneProbes = dut.io.laneProbes.zipWithIndex.map { + case (p, idx) => + val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") + wire := probe.read(p) + wire + } + val laneVrfProbes = dut.io.laneVrfProbes.zipWithIndex.map { + case (p, idx) => + val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") + wire := probe.read(p) + wire + } + val storeUnitProbe = lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") + val otherUnitProbe = lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") + + // output the probes + // rocket reg write + when(rocketProbe.rfWen)( + printf( + cf"""{"event":"RegWrite","idx":${rocketProbe.rfWaddr},"data":"${rocketProbe.rfWdata}%x","cycle":${simulationTime}}\n""" + ) + ) + + // t1 vrf write + laneVrfProbes.zipWithIndex.foreach { + case (lane, i) => + when(lane.valid)( + printf( + cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" + ) + ) + } + + // t1 memory write from store unit + when(storeUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${storeUnitProbe.index},"mask":"${storeUnitProbe.mask}%x","data":"${storeUnitProbe.data}%x","address":"${storeUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + + // t1 memory write from other unit + when(otherUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${otherUnitProbe.index},"mask":"${otherUnitProbe.mask}%x","data":"${otherUnitProbe.data}%x","address":"${otherUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + + // t1 issue + when(t1Probe.issue.valid)( + printf(cf"""{"event":"Issue","idx":${t1Probe.issue.bits},"cycle":${simulationTime}}\n""") + ) + + // t1 retire + when(t1Probe.retire.valid)( + printf( + cf"""{"event":"CheckRd","data":"${t1Probe.retire.bits}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""" + ) + ) + + // t1 lsu enq + when(lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) + + // t1 vrf scoreboard + val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * generator.parameter.t1Parameter.chainingSize) { _ => + RegInit(0.U.asTypeOf(Valid(UInt(16.W)))) + } + vrfWriteScoreboard.foreach(scoreboard => dontTouch(scoreboard)) + val instructionValid = + (laneProbes.map(laneProbe => laneProbe.instructionValid ## laneProbe.instructionValid) :+ + lsuProbe.lsuInstructionValid :+ t1Probe.instructionValid).reduce(_ | _) + val scoreboardEnq = + Mux(t1Probe.instructionIssue, UIntToOH(t1Probe.issueTag), 0.U((2 * generator.parameter.t1Parameter.chainingSize).W)) + vrfWriteScoreboard.zipWithIndex.foreach { + case (scoreboard, tag) => + val writeEnq: UInt = VecInit( + // vrf write from lane + laneProbes.flatMap(laneProbe => + laneProbe.slots.map(slot => slot.writeTag === tag.U && slot.writeQueueEnq && slot.writeMask.orR) + ) ++ laneProbes.flatMap(laneProbe => + laneProbe.crossWriteProbe.map(cp => cp.bits.writeTag === tag.U && cp.valid && cp.bits.writeMask.orR) + ) ++ + // vrf write from lsu + lsuProbe.slots.map(slot => slot.dataInstruction === tag.U && slot.writeValid && slot.dataMask.orR) ++ + // vrf write from Sequencer + Some(t1Probe.writeQueueEnq.bits === tag.U && t1Probe.writeQueueEnq.valid && t1Probe.writeQueueEnqMask.orR) + ).asUInt + // always equal to array index + scoreboard.bits := scoreboard.bits + PopCount(writeEnq) + when(scoreboard.valid && !instructionValid(tag)) { + printf( + cf"""{"event":"VrfScoreboard","count":${scoreboard.bits},"issue_idx":${tag},"cycle":${simulationTime}}\n""" + ) + scoreboard.valid := false.B + } + when(scoreboardEnq(tag)) { + scoreboard.valid := true.B + assert(!scoreboard.valid) + scoreboard.bits := 0.U + } + } } From a96ba31e213cc8ab9b8097ad51bdba1992168bcc Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Wed, 7 Aug 2024 00:59:05 +0800 Subject: [PATCH 133/140] [t1rocket] add offline difftest --- t1rocketemu/offline/src/difftest.rs | 4 +++ t1rocketemu/offline/src/json_events.rs | 33 ++++++++++++++++++++- t1rocketemu/online_dpi/src/drive.rs | 5 +--- t1rocketemu/test_common/src/spike_runner.rs | 9 ++++++ 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/t1rocketemu/offline/src/difftest.rs b/t1rocketemu/offline/src/difftest.rs index 3789297f5..5e59ca60f 100644 --- a/t1rocketemu/offline/src/difftest.rs +++ b/t1rocketemu/offline/src/difftest.rs @@ -43,6 +43,10 @@ impl Difftest { self.runner.cycle = *cycle; Ok(()) } + JsonEvents::RegWrite { idx, data, cycle } => { + self.runner.cycle = *cycle; + self.runner.peek_reg_write(&RegWriteEvent { idx: *idx, data: *data, cycle: *cycle }) + } JsonEvents::Issue { idx, cycle } => { self.runner.cycle = *cycle; self.runner.peek_issue(&IssueEvent { idx: *idx, cycle: *cycle }) diff --git a/t1rocketemu/offline/src/json_events.rs b/t1rocketemu/offline/src/json_events.rs index 3b19ddad5..585c5372a 100644 --- a/t1rocketemu/offline/src/json_events.rs +++ b/t1rocketemu/offline/src/json_events.rs @@ -52,6 +52,12 @@ pub(crate) enum JsonEvents { reason: u8, cycle: u64, }, + RegWrite { + idx: u8, + #[serde(deserialize_with = "str_to_u32", default)] + data: u32, + cycle: u64, + }, Issue { idx: u8, cycle: u64, @@ -94,6 +100,12 @@ pub(crate) enum JsonEvents { }, } +pub struct RegWriteEvent { + pub idx: u8, + pub data: u32, + pub cycle: u64, +} + pub struct IssueEvent { pub idx: u8, pub cycle: u64, @@ -135,6 +147,8 @@ pub struct CheckRdEvent { } pub(crate) trait JsonEventRunner { + fn peek_reg_write(&mut self, reg_write: &RegWriteEvent) -> anyhow::Result<()>; + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()>; fn update_lsu_idx(&mut self, lsu_enq: &LsuEnqEvent) -> anyhow::Result<()>; @@ -153,6 +167,23 @@ pub(crate) trait JsonEventRunner { } impl JsonEventRunner for SpikeRunner { + fn peek_reg_write(&mut self, reg_write: &RegWriteEvent) -> anyhow::Result<()> { + let cycle = reg_write.cycle; + let idx = reg_write.idx; + let data = reg_write.data; + + let se = self.find_reg_write(); + + info!( + "[{cycle}] RegWrite: inst ({}) check reg write idx={idx}, data={data:08x}", + se.describe_insn() + ); + + assert_eq!(idx as u32, se.rd_idx, "idx should be equal to se.rd_idx"); + assert_eq!(data, se.rd_bits, "data should be equal to se.rd_bits"); + Ok(()) + } + fn peek_issue(&mut self, issue: &IssueEvent) -> anyhow::Result<()> { self.find_v_se_to_issue(); // ensure the front of queue is a new un-issued se let se = self.commit_queue.front_mut().unwrap(); @@ -163,7 +194,7 @@ impl JsonEventRunner for SpikeRunner { se.issue_idx = issue.idx as u8; info!( - "[{}] SpikePeekIssue: issue_idx={}, pc={:#x}, inst={}", + "[{}] Issue: issue_idx={}, pc={:#x}, inst={}", issue.cycle, issue.idx, se.pc, se.disasm ); diff --git a/t1rocketemu/online_dpi/src/drive.rs b/t1rocketemu/online_dpi/src/drive.rs index 7e71114ac..2b89fdca4 100644 --- a/t1rocketemu/online_dpi/src/drive.rs +++ b/t1rocketemu/online_dpi/src/drive.rs @@ -12,10 +12,7 @@ use elf::{ }; use std::collections::HashMap; use std::os::unix::fs::FileExt; -use std::{ - fs, - path::{Path, PathBuf}, -}; +use std::{fs, path::Path}; use tracing::{debug, error, info, trace}; struct ShadowMem { diff --git a/t1rocketemu/test_common/src/spike_runner.rs b/t1rocketemu/test_common/src/spike_runner.rs index b9339be7a..3d8712708 100644 --- a/t1rocketemu/test_common/src/spike_runner.rs +++ b/t1rocketemu/test_common/src/spike_runner.rs @@ -123,6 +123,15 @@ impl SpikeRunner { event } + pub fn find_reg_write(&mut self) -> SpikeEvent { + loop { + let se = self.spike_step(); + if se.is_scalar() && se.is_rd_written { + return se; + } + } + } + pub fn find_v_se_to_issue(&mut self) -> SpikeEvent { if !self.commit_queue.is_empty() && self.commit_queue.front().unwrap().is_vfence() { // if the front (latest) se is a vfence, return the vfence From a4448d87e30c3e3fceb9a66d454a2bf981ac6fb8 Mon Sep 17 00:00:00 2001 From: qinjun-li Date: Wed, 7 Aug 2024 20:35:09 +0800 Subject: [PATCH 134/140] [rocketv] add rv_f in rocket. --- rocketv/configs/meowth.json | 2 +- t1rocketemu/configs/default.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocketv/configs/meowth.json b/rocketv/configs/meowth.json index ee5fb35c1..12ddcfa36 100644 --- a/rocketv/configs/meowth.json +++ b/rocketv/configs/meowth.json @@ -2,7 +2,7 @@ "parameter": { "useAsyncReset": false, "clockGate": true, - "instructionSets": ["rv32_i"], + "instructionSets": ["rv32_i", "rv_f"], "priv": "m", "hartIdLen": 4, "useBPWatch": false, diff --git a/t1rocketemu/configs/default.json b/t1rocketemu/configs/default.json index bdb6154f2..0cf9f92dc 100644 --- a/t1rocketemu/configs/default.json +++ b/t1rocketemu/configs/default.json @@ -1,6 +1,6 @@ { "parameter": { - "instructionSets": ["rv32_i", "rv_a", "rv_v", "Zve32x", "zvl1024b", "rv_c"], + "instructionSets": ["rv32_i", "rv_f", "rv_a", "rv_v", "Zve32x", "zvl1024b", "rv_c"], "cacheBlockBytes": 32, "nPMPs": 8, "cacheable": "b1???????????????????????????????", From f4e1e31df67f7a1c48a88bd6b115f3b0d517b46e Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 9 Aug 2024 14:47:27 +0800 Subject: [PATCH 135/140] fix for rebase --- t1/src/T1.scala | 4 ++-- t1rocket/src/T1RocketTile.scala | 37 +++++++++++++-------------------- t1rocketemu/src/TestBench.scala | 17 ++++++++------- 3 files changed, 26 insertions(+), 32 deletions(-) diff --git a/t1/src/T1.scala b/t1/src/T1.scala index 7423d557a..676ab423a 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -307,8 +307,8 @@ class T1Probe(parameter: T1Parameter) extends Bundle { // probes val lsuProbe: LSUProbe = new LSUProbe(parameter.lsuParameters) val laneProbes: Vec[LaneProbe] = Vec(parameter.laneNumber, new LaneProbe(parameter.laneParam)) - val issue: ValidIO[UInt] = Valid(UInt(param.instructionIndexBits.W)) - val retire: ValidIO[UInt] = Valid(UInt(param.xLen.W)) + val issue: ValidIO[UInt] = Valid(UInt(parameter.instructionIndexBits.W)) + val retire: ValidIO[UInt] = Valid(UInt(parameter.xLen.W)) } class T1Interface(parameter: T1Parameter) extends Record { diff --git a/t1rocket/src/T1RocketTile.scala b/t1rocket/src/T1RocketTile.scala index 321f7b8cd..f413ed593 100644 --- a/t1rocket/src/T1RocketTile.scala +++ b/t1rocket/src/T1RocketTile.scala @@ -365,7 +365,8 @@ case class T1RocketTileParameter( ), Seq(0, 1, 2, 3))), floatModuleParameters = - Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))) + Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3))), + zvbbModuleParameters = Seq() ) else VFUInstantiateParameter( slotCount = 4, @@ -395,7 +396,8 @@ case class T1RocketTileParameter( OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1) ), Seq(0, 1, 2, 3))), - floatModuleParameters = Seq() + floatModuleParameters = Seq(), + zvbbModuleParameters = Seq() ) def t1Parameter: T1Parameter = T1Parameter( @@ -422,6 +424,11 @@ case class T1RocketTileParameter( def t1HightOutstandingParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter.copy(dataWidth = 32) } +class T1RocketProbe(parameter: T1RocketTileParameter) extends Bundle { + val rocketProbe: RocketProbe = Output(Probe(new RocketProbe(parameter.rocketParameter))) + val t1Probe: T1Probe = Output(Probe(new T1Probe(parameter.t1Parameter))) +} + class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { val clock = Input(Clock()) val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool()) @@ -456,21 +463,8 @@ class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { val highBandwidthAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HighBandwidthParameter) val highOutstandingAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HightOutstandingParameter) - val rocketProbe: RocketProbe = Output(Probe(new RocketProbe(parameter.rocketParameter))) - val t1Probe: T1Probe = Output(Probe(new T1Probe(parameter.t1Parameter))) - val lsuProbe: LSUProbe = Output(Probe(new LSUProbe(parameter.t1Parameter.lsuParameters))) - val laneProbes: Vec[LaneProbe] = Vec(parameter.t1Parameter.laneNumber, Output(Probe(new LaneProbe( - parameter.t1Parameter.laneParam.chainingSize, - parameter.t1Parameter.laneParam.instructionIndexBits - ))) - ) - val laneVrfProbes: Vec[VRFProbe] = Vec(parameter.t1Parameter.laneNumber, Output(Probe(new VRFProbe( - parameter.t1Parameter.laneParam.vrfParam.regNumBits, - parameter.t1Parameter.laneParam.vrfParam.vrfOffsetBits, - parameter.t1Parameter.laneParam.vrfParam.instructionIndexBits, - parameter.t1Parameter.laneParam.vrfParam.datapathWidth - ))) - ) + // TODO: merge it. + val t1RocketProbe: T1RocketProbe = Output(Probe(new T1RocketProbe(parameter))) } class T1RocketTile(val parameter: T1RocketTileParameter) @@ -561,9 +555,8 @@ class T1RocketTile(val parameter: T1RocketTileParameter) io.highOutstandingAXI <> t1.io.indexedLoadStorePort // probe - define(io.rocketProbe, rocket.io.rocketProbe) - define(io.t1Probe, t1.io.t1Probe) - define(io.lsuProbe, t1.io.lsuProbe) - io.laneProbes.zipWithIndex.foreach { case (io, index) => define(io, t1.io.laneProbes(index)) } - io.laneVrfProbes.zipWithIndex.foreach { case (io, index) => define(io, t1.io.laneVrfProbes(index)) } + val probeWire = Wire(new T1RocketProbe(parameter)) + define(io.t1RocketProbe, ProbeValue(probeWire)) + probeWire.rocketProbe := probe.read(rocket.io.rocketProbe) + probeWire.t1Probe := probe.read(t1.io.t1Probe) } diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala index aa431fedc..58aae8534 100644 --- a/t1rocketemu/src/TestBench.scala +++ b/t1rocketemu/src/TestBench.scala @@ -157,23 +157,24 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil loadStoreAgent.io.gateWrite := false.B // probes - val rocketProbe = probe.read(dut.io.rocketProbe).suggestName(s"rocketProbe") - val t1Probe = probe.read(dut.io.t1Probe).suggestName(s"t1Probe") - val lsuProbe = probe.read(dut.io.lsuProbe).suggestName(s"lsuProbe") - val laneProbes = dut.io.laneProbes.zipWithIndex.map { + val t1RocketProbe = probe.read(dut.io.t1RocketProbe) + val rocketProbe = t1RocketProbe.rocketProbe.suggestName(s"rocketProbe") + val t1Probe = t1RocketProbe.t1Probe + val lsuProbe = t1Probe.lsuProbe + val laneProbes = t1Probe.laneProbes.zipWithIndex.map { case (p, idx) => val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") wire := probe.read(p) wire } - val laneVrfProbes = dut.io.laneVrfProbes.zipWithIndex.map { + val laneVrfProbes = t1Probe.laneProbes.map(_.vrfProbe).zipWithIndex.map { case (p, idx) => val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") wire := probe.read(p) wire } - val storeUnitProbe = lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") - val otherUnitProbe = lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") + val storeUnitProbe = t1Probe.lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") + val otherUnitProbe = t1Probe.lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") // output the probes // rocket reg write @@ -220,7 +221,7 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil ) // t1 lsu enq - when(lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) + when(t1Probe.lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${t1Probe.lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) // t1 vrf scoreboard val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * generator.parameter.t1Parameter.chainingSize) { _ => From 4ce33aea812adac1bc9b2be8d89acf290d5abefc Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 9 Aug 2024 15:12:09 +0800 Subject: [PATCH 136/140] fix for rebase --- t1rocket/src/T1RocketTile.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t1rocket/src/T1RocketTile.scala b/t1rocket/src/T1RocketTile.scala index f413ed593..9699eea05 100644 --- a/t1rocket/src/T1RocketTile.scala +++ b/t1rocket/src/T1RocketTile.scala @@ -425,8 +425,8 @@ case class T1RocketTileParameter( } class T1RocketProbe(parameter: T1RocketTileParameter) extends Bundle { - val rocketProbe: RocketProbe = Output(Probe(new RocketProbe(parameter.rocketParameter))) - val t1Probe: T1Probe = Output(Probe(new T1Probe(parameter.t1Parameter))) + val rocketProbe: RocketProbe = Output(new RocketProbe(parameter.rocketParameter)) + val t1Probe: T1Probe = Output(new T1Probe(parameter.t1Parameter)) } class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle { From e1492eec8b7ab5225cc6e215561f77044214f0e7 Mon Sep 17 00:00:00 2001 From: Jiuyang Liu Date: Fri, 9 Aug 2024 15:32:42 +0800 Subject: [PATCH 137/140] fix for rebase --- t1rocketemu/src/TestBench.scala | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala index 58aae8534..d49abc0c1 100644 --- a/t1rocketemu/src/TestBench.scala +++ b/t1rocketemu/src/TestBench.scala @@ -159,18 +159,16 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil // probes val t1RocketProbe = probe.read(dut.io.t1RocketProbe) val rocketProbe = t1RocketProbe.rocketProbe.suggestName(s"rocketProbe") - val t1Probe = t1RocketProbe.t1Probe - val lsuProbe = t1Probe.lsuProbe + val t1Probe = t1RocketProbe.t1Probe.suggestName(s"t1Probe") + val lsuProbe = t1Probe.lsuProbe.suggestName(s"t1LSUProbe") val laneProbes = t1Probe.laneProbes.zipWithIndex.map { case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") - wire := probe.read(p) + val wire = WireDefault(p).suggestName(s"lane${idx}Probe") wire } val laneVrfProbes = t1Probe.laneProbes.map(_.vrfProbe).zipWithIndex.map { case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") - wire := probe.read(p) + val wire = WireDefault(p).suggestName(s"lane${idx}VrfProbe") wire } val storeUnitProbe = t1Probe.lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") From 2e45a61b54186cabdcf9c63d5811107ed021166c Mon Sep 17 00:00:00 2001 From: Clo91eaf Date: Mon, 12 Aug 2024 14:17:25 +0800 Subject: [PATCH 138/140] [t1rocket] add quit to terminate simulation --- t1rocketemu/online_dpi/src/dpi.rs | 9 +++++++++ t1rocketemu/online_dpi/src/drive.rs | 10 +++++++++- t1rocketemu/online_dpi/src/lib.rs | 4 ++++ t1rocketemu/src/TestBench.scala | 5 +++++ tests/emurt/emurt.c | 4 +++- tests/t1_main.S | 4 +++- 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/t1rocketemu/online_dpi/src/dpi.rs b/t1rocketemu/online_dpi/src/dpi.rs index 82c33ebe3..5d2599942 100644 --- a/t1rocketemu/online_dpi/src/dpi.rs +++ b/t1rocketemu/online_dpi/src/dpi.rs @@ -313,6 +313,9 @@ mod dpi_export { #[cfg(feature = "trace")] /// `export "DPI-C" function dump_wave(input string file)` pub fn dump_wave(path: *const c_char); + + /// 'export "DPI-C" function quit()' + pub fn quit(); } } @@ -326,3 +329,9 @@ pub(crate) fn dump_wave(scope: crate::svdpi::SvScope, path: &str) { dpi_export::dump_wave(path_cstring.as_ptr()); } } + +pub(crate) fn quit() { + unsafe { + dpi_export::quit(); + } +} \ No newline at end of file diff --git a/t1rocketemu/online_dpi/src/drive.rs b/t1rocketemu/online_dpi/src/drive.rs index 2b89fdca4..6a4ff08f6 100644 --- a/t1rocketemu/online_dpi/src/drive.rs +++ b/t1rocketemu/online_dpi/src/drive.rs @@ -1,5 +1,5 @@ use crate::dpi::*; -use crate::get_t; +use crate::{ get_t, EXIT_CODE, EXIT_POS }; use crate::svdpi::SvScope; use crate::OfflineArgs; @@ -335,6 +335,14 @@ impl Driver { self.shadow_mem.write_mem_axi(addr, size, 32, strobe, data); let data_hex = hex::encode(data); self.last_commit_cycle = get_t(); + + // exit with code + if addr == EXIT_POS && data.len() == 4 && data == &EXIT_CODE.to_le_bytes() { + info!("exit successfully"); + quit(); + return; + } + trace!( "[{}] axi_write_load_store (addr={addr:#x}, size={size}, data={data_hex})", get_t() diff --git a/t1rocketemu/online_dpi/src/lib.rs b/t1rocketemu/online_dpi/src/lib.rs index bafe6db0d..35a72ec33 100644 --- a/t1rocketemu/online_dpi/src/lib.rs +++ b/t1rocketemu/online_dpi/src/lib.rs @@ -24,6 +24,10 @@ pub(crate) struct OfflineArgs { pub timeout: u64, } +// quit signal +const EXIT_POS: u32 = 0x4000_0000; +const EXIT_CODE: u32 = 0xdead_beef; + // keep in sync with TestBench.ClockGen pub const CYCLE_PERIOD: u64 = 20; diff --git a/t1rocketemu/src/TestBench.scala b/t1rocketemu/src/TestBench.scala index d49abc0c1..aa72472c5 100644 --- a/t1rocketemu/src/TestBench.scala +++ b/t1rocketemu/src/TestBench.scala @@ -34,6 +34,11 @@ class TestBench(generator: SerializableModuleGenerator[T1RocketTile, T1RocketTil |`endif | endfunction; | + | export "DPI-C" function quit; + | function quit(); + | $$finish; + | endfunction; + | | import "DPI-C" context function void t1rocket_cosim_init(); | initial begin | t1rocket_cosim_init(); diff --git a/tests/emurt/emurt.c b/tests/emurt/emurt.c index f52a57d36..77ebc6fad 100644 --- a/tests/emurt/emurt.c +++ b/tests/emurt/emurt.c @@ -54,7 +54,9 @@ int _write(int file, char* ptr, int len) { } void _exit(int code) { - __asm__("csrwi 0x7cc, 0"); + __asm__("li x1, 0x40000000"); + __asm__("li x2, 0xdeadbeef"); + __asm__("sw x2, 0(x1)"); __builtin_unreachable(); } diff --git a/tests/t1_main.S b/tests/t1_main.S index 85ed6ac32..426e90a1b 100644 --- a/tests/t1_main.S +++ b/tests/t1_main.S @@ -10,6 +10,8 @@ _start: call test // exit - csrwi 0x7cc, 0 + li x1, 0x40000000 + li x2, 0xdeadbeef + sw x2, 0(x1) .p2align 2 From c7d507a91b9d7850fa3b51e1fd72721cdf41cff6 Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 12 Aug 2024 20:03:16 +0800 Subject: [PATCH 139/140] [nix] refactor features set filter to be more structural Signed-off-by: Avimitin --- tests/asm/fpsmoke/features-required.json | 2 +- tests/codegen/default.nix | 25 +++----- tests/default.nix | 60 +++++++++++++++---- .../features-required.json | 2 +- .../intrinsic/softmax/features-required.json | 2 +- tests/perf/llama/default.nix | 4 +- tests/rvv_bench/default.nix | 2 +- .../mandelbrot/features-required.json | 2 +- 8 files changed, 66 insertions(+), 33 deletions(-) diff --git a/tests/asm/fpsmoke/features-required.json b/tests/asm/fpsmoke/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/asm/fpsmoke/features-required.json +++ b/tests/asm/fpsmoke/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/codegen/default.nix b/tests/codegen/default.nix index e4883ade6..9f438a730 100644 --- a/tests/codegen/default.nix +++ b/tests/codegen/default.nix @@ -4,21 +4,12 @@ , makeBuilder # Instead of testing feature is supported on TOP level, # codegen case are always generated with supported code. -, currentFeatures +, featuresSet }: let builder = makeBuilder { casePrefix = "codegen"; }; makeCaseName = lib.replaceStrings [ "." ] [ "_" ]; - extraValueFromFeatures = pattern: - lib.last - (lib.splitString ":" - (lib.head - (lib.filter - (lib.hasPrefix pattern) - currentFeatures))); - vlen = extraValueFromFeatures "vlen"; - xlen = extraValueFromFeatures "xlen"; build = { rawCaseName, extra }: builder @@ -36,8 +27,8 @@ let runHook preBuild ${rvv-codegen}/bin/single \ - -VLEN "${vlen}" \ - -XLEN "${xlen}" \ + -VLEN "${featuresSet.vlen}" \ + -XLEN "${featuresSet.xlen}" \ -repeat 16 \ -testfloat3level 2 \ -configfile ${rvv-codegen}/configs/${rawCaseName}.toml \ @@ -71,13 +62,13 @@ let ) rawCaseNames)); - commonTests = buildTestsFromFile ./common.txt { featuresRequired = [ ]; }; - fpTests = buildTestsFromFile ./fp.txt { featuresRequired = [ "zve32f" ]; }; - zvbbTests = buildTestsFromFile ./zvbb.txt { featuresRequired = [ "zvbb" ]; }; + commonTests = buildTestsFromFile ./common.txt { featuresRequired = { extensions = [ ]; }; }; + fpTests = buildTestsFromFile ./fp.txt { featuresRequired = { extensions = [ "zve32f" ]; }; }; + zvbbTests = buildTestsFromFile ./zvbb.txt { featuresRequired = { extensions = [ "zvbb" ]; }; }; in lib.recurseIntoAttrs ( commonTests // - lib.optionalAttrs (lib.elem "zve32f" currentFeatures) fpTests // - lib.optionalAttrs (lib.elem "zvbb" currentFeatures) zvbbTests + lib.optionalAttrs (lib.elem "zve32f" featuresSet.extensions) fpTests // + lib.optionalAttrs (lib.elem "zvbb" featuresSet.extensions) zvbbTests ) diff --git a/tests/default.nix b/tests/default.nix index 36e5c64c9..535922886 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -11,22 +11,62 @@ }: let - # Add an extra abstract layer between test case and RTL design, so that we can have clean and organized way - # for developer to specify their required features without the need to parse ISA string themselves. - currentFeatures = [ - "vlen:${rtlDesignMetadata.vlen}" - "dlen:${rtlDesignMetadata.dlen}" - "xlen:${if (lib.hasPrefix "rv32" rtlDesignMetadata.march) then "32" else "64"}" - ] - ++ (lib.splitString "_" rtlDesignMetadata.march); + getVLen = ext: + let + val = builtins.tryEval + (lib.toInt + (lib.toLower + (lib.removeSuffix "b" + (lib.removePrefix "zvl" + (lib.toLower ext))))); + in + if val.success then + val.value + else + throw "Invalid vlen extension `${ext}` specify, expect Zvl{N}b"; + + featuresSet = { + extensions = lib.splitString "_" rtlDesignMetadata.march; + xlen = if (lib.hasPrefix "rv32" rtlDesignMetadata.march) then 32 else 64; + vlen = getVLen (lib.last + (lib.filter + (x: lib.hasPrefix "zvl" + (lib.toLower x)))); + inherit (rtlDesignMetadata) dlen; + }; # isSubSetOf m n: n is subset of m isSubsetOf = m: n: lib.all (x: lib.elem x m) n; + # Return true if attribute in first argument exists in second argument, and the value is also equal. + # + # Example: + # + # hasIntersect { } { a = [1 2 3]; b = 4; } # true + # hasIntersect { a = [1]; } { a = [1 2 3]; b = 4; } # true + # hasIntersect { a = [1]; b = 4; } { a = [1 2 3]; b = 4; } # true + # hasIntersect { a = [4]; } { a = [1 2 3]; b = 4; } # false + # hasIntersect { c = 4; } { a = [1 2 3]; b = 4; } # false + # + # hasIntersect :: AttrSet -> AttrSet -> Bool + hasIntersect = ma: na: with builtins; let + keysMa = attrNames ma; + keysNa = attrNames na; + intersectKeys = lib.filter (n: lib.elem n keysNa) (attrNames ma); + intersectValEquality = map + (key: + if typeOf (ma.${key}) == "list" then + isSubsetOf na.${key} ma.${key} + else ma.${key} == na.${key}) + intersectKeys; + in + (length keysMa == 0) || + ((length intersectKeys > 0) && all (isEqual: isEqual) intersectValEquality); + scope = lib.recurseIntoAttrs (lib.makeScope newScope (casesSelf: { recurseForDerivations = true; - inherit verilator-emu verilator-emu-trace vcs-emu vcs-emu-trace rtlDesignMetadata currentFeatures; + inherit verilator-emu verilator-emu-trace vcs-emu vcs-emu-trace rtlDesignMetadata featuresSet; makeEmuResult = casesSelf.callPackage ./make-emu-result.nix { }; @@ -48,7 +88,7 @@ let filterByFeatures = caseName: caseDrv: assert lib.assertMsg (caseDrv ? featuresRequired) "${caseName} doesn't have features specified"; # Test the case required extensions is supported by rtl design - isSubsetOf currentFeatures caseDrv.featuresRequired; + hasIntersect caseDrv.featuresRequired featuresSet; findAndBuild = dir: build: lib.recurseIntoAttrs (lib.pipe (builtins.readDir dir) [ diff --git a/tests/intrinsic/linear_normalization/features-required.json b/tests/intrinsic/linear_normalization/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/intrinsic/linear_normalization/features-required.json +++ b/tests/intrinsic/linear_normalization/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/intrinsic/softmax/features-required.json b/tests/intrinsic/softmax/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/intrinsic/softmax/features-required.json +++ b/tests/intrinsic/softmax/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } diff --git a/tests/perf/llama/default.nix b/tests/perf/llama/default.nix index 74d111dd7..7f8743d09 100644 --- a/tests/perf/llama/default.nix +++ b/tests/perf/llama/default.nix @@ -21,7 +21,9 @@ let in build { - featuresRequired = [ "zve32f" ]; + featuresRequired = { + extensions = [ "zve32f" ]; + }; caseName = "llama"; diff --git a/tests/rvv_bench/default.nix b/tests/rvv_bench/default.nix index 5d8f88017..516fa50ff 100644 --- a/tests/rvv_bench/default.nix +++ b/tests/rvv_bench/default.nix @@ -43,7 +43,7 @@ let patches = [ ./t1_runtime.patch ]; - featuresRequired = lib.optionals (lib.elem caseName fpCases) [ "zve32f" ]; + featuresRequired = lib.optionals (lib.elem caseName fpCases) { extensions = [ "zve32f" ]; }; buildPhase = '' runHook preBuild diff --git a/tests/rvv_bench/mandelbrot/features-required.json b/tests/rvv_bench/mandelbrot/features-required.json index 892f81d20..08c7567d8 100644 --- a/tests/rvv_bench/mandelbrot/features-required.json +++ b/tests/rvv_bench/mandelbrot/features-required.json @@ -1 +1 @@ -["zve32f"] +{ "extensions": ["zve32f"] } From a86154f4da8618df1531d4f72c17857a5f659f1c Mon Sep 17 00:00:00 2001 From: Avimitin Date: Mon, 12 Aug 2024 21:37:10 +0800 Subject: [PATCH 140/140] [nix] refactor test case to allow t1rocket attr opt-in Signed-off-by: Avimitin --- t1rocketemu/default.nix | 67 ++--------- t1rocketemu/emu.nix | 61 ++++++++++ tests/asm/default.nix | 2 +- tests/builder.nix | 87 ++++++++------- tests/codegen/default.nix | 6 +- tests/default.nix | 41 ++++--- tests/intrinsic/default.nix | 2 +- tests/make-emu-result.nix | 209 ++++++++++++++++++----------------- tests/mlir/default.nix | 2 +- tests/perf/llama/default.nix | 2 +- tests/pytorch/default.nix | 2 +- tests/rvv_bench/default.nix | 2 +- 12 files changed, 257 insertions(+), 226 deletions(-) create mode 100644 t1rocketemu/emu.nix diff --git a/t1rocketemu/default.nix b/t1rocketemu/default.nix index 6ca9fad04..bd63fc4cb 100644 --- a/t1rocketemu/default.nix +++ b/t1rocketemu/default.nix @@ -1,67 +1,18 @@ { lib , newScope -, rustPlatform -, zlib -, libspike -, libspike_interfaces -, cmake -, verilator }: -lib.makeScope newScope (scope: rec { +lib.makeScope newScope (scope: { mlirbc = scope.callPackage ./nix/mlirbc.nix { }; rtl = scope.callPackage ./nix/rtl.nix { }; verilated-c-lib = scope.callPackage ./nix/verilated-c-lib.nix { }; - - emu = rustPlatform.buildRustPackage { - name = "t1rocketemu"; - - src = with lib.fileset; toSource { - root = ./.; - fileset = unions [ - ./test_common - ./spike_rs - ./offline - ./online_dpi - ./online_drive - ./online_vcs - ./Cargo.lock - ./Cargo.toml - ]; + emu = scope.callPackage ./emu.nix { }; + designConfig = with builtins; (fromJSON (readFile ./configs/default.json)).parameter; + cases = scope.callPackage ../tests { + configName = "t1rocket"; + t1rocket-emu = scope.emu; + rtlDesignMetadata = { + march = "rv32iafcv_zve32x_zvl1024b"; + dlen = scope.designConfig.dLen; }; - - buildInputs = [ - zlib - libspike_interfaces - verilated-c-lib - ]; - - nativeBuildInputs = [ - verilator - cmake - ]; - - # FIXME: can we hack this into derivations, so that we don't need to specify library dir explicitly? - env = - let - toLib = drv: "${drv}/lib"; - in - { - SPIKE_LIB_DIR = toLib libspike; - SPIKE_INTERFACES_LIB_DIR = toLib libspike_interfaces; - VERILATED_INC_DIR = "${verilated-c-lib}/include"; - VERILATED_LIB_DIR = "${verilated-c-lib}/lib"; - }; - - cargoLock = { - lockFile = ./Cargo.lock; - }; - - outputs = [ "out" "driver" "offline" ]; - - postInstall = '' - mkdir -p $driver/bin $offline/bin - ln -s $out/bin/driver $driver/bin/driver - ln -s $out/bin/offline $driver/bin/offline - ''; }; }) diff --git a/t1rocketemu/emu.nix b/t1rocketemu/emu.nix new file mode 100644 index 000000000..50f14e2c9 --- /dev/null +++ b/t1rocketemu/emu.nix @@ -0,0 +1,61 @@ +{ lib +, rustPlatform +, zlib +, libspike +, libspike_interfaces +, cmake +, verilator +, verilated-c-lib +}: +rustPlatform.buildRustPackage { + name = "t1rocketemu"; + + src = with lib.fileset; toSource { + root = ./.; + fileset = unions [ + ./test_common + ./spike_rs + ./offline + ./online_dpi + ./online_drive + ./online_vcs + ./Cargo.lock + ./Cargo.toml + ]; + }; + + buildInputs = [ + zlib + libspike_interfaces + verilated-c-lib + ]; + + nativeBuildInputs = [ + verilator + cmake + ]; + + # FIXME: can we hack this into derivations, so that we don't need to specify library dir explicitly? + env = + let + toLib = drv: "${drv}/lib"; + in + { + SPIKE_LIB_DIR = toLib libspike; + SPIKE_INTERFACES_LIB_DIR = toLib libspike_interfaces; + VERILATED_INC_DIR = "${verilated-c-lib}/include"; + VERILATED_LIB_DIR = "${verilated-c-lib}/lib"; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + }; + + outputs = [ "out" "driver" "offline" ]; + + postInstall = '' + mkdir -p $driver/bin $offline/bin + ln -s $out/bin/driver $driver/bin/driver + ln -s $out/bin/offline $driver/bin/offline + ''; +} diff --git a/tests/asm/default.nix b/tests/asm/default.nix index debd78f5c..1fb5c6c19 100644 --- a/tests/asm/default.nix +++ b/tests/asm/default.nix @@ -14,7 +14,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; isFp = lib.pathExists (lib.path.append sourcePath "isFp"); buildPhase = '' diff --git a/tests/builder.nix b/tests/builder.nix index 4730af191..e029699af 100644 --- a/tests/builder.nix +++ b/tests/builder.nix @@ -19,56 +19,57 @@ let # avoid adding jq to buildInputs, since it will make overriding buildInputs more error prone jqBin = "${jq}/bin/jq"; - caseDrv = stdenv.mkDerivation (self: rec { - # don't set name directory, since it will be suffixed with target triple - pname = "${casePrefix}.${caseName}"; - name = pname; + caseDrv = stdenv.mkDerivation (self: lib.recursiveUpdate + rec { + # don't set name directory, since it will be suffixed with target triple + pname = "${casePrefix}.${caseName}"; + name = pname; - CC = "${stdenv.targetPlatform.config}-cc"; + CC = "${stdenv.targetPlatform.config}-cc"; - NIX_CFLAGS_COMPILE = - let - march = lib.pipe rtlDesignMetadata.march [ - (lib.splitString "_") - (map (ext: if ext == "zvbb" then "zvbb1" else ext)) - (lib.concatStringsSep "_") - ]; - in - [ - "-mabi=ilp32f" - "-march=${march}" - "-mno-relax" - "-static" - "-mcmodel=medany" - "-fvisibility=hidden" - "-fno-PIC" - "-g" - "-O3" - ] ++ lib.optionals (lib.elem "zvbb" (lib.splitString "_" rtlDesignMetadata.march)) [ "-menable-experimental-extensions" ]; + NIX_CFLAGS_COMPILE = + let + march = lib.pipe rtlDesignMetadata.march [ + (lib.splitString "_") + (map (ext: if ext == "zvbb" then "zvbb1" else ext)) + (lib.concatStringsSep "_") + ]; + in + [ + "-mabi=ilp32f" + "-march=${march}" + "-mno-relax" + "-static" + "-mcmodel=medany" + "-fvisibility=hidden" + "-fno-PIC" + "-g" + "-O3" + ] ++ lib.optionals (lib.elem "zvbb" (lib.splitString "_" rtlDesignMetadata.march)) [ "-menable-experimental-extensions" ]; - installPhase = '' - runHook preInstall + installPhase = '' + runHook preInstall - mkdir -p $out/bin - cp ${pname}.elf $out/bin + mkdir -p $out/bin + cp ${pname}.elf $out/bin - ${jqBin} --null-input \ - --arg name ${pname} \ - --arg type ${casePrefix} \ - --arg elfPath "$out/bin/${pname}.elf" \ - '{ "name": $name, "elf": { "path": $elfPath } }' \ - > $out/${pname}.json + ${jqBin} --null-input \ + --arg name ${pname} \ + --arg type ${casePrefix} \ + --arg elfPath "$out/bin/${pname}.elf" \ + '{ "name": $name, "elf": { "path": $elfPath } }' \ + > $out/${pname}.json - runHook postInstall - ''; + runHook postInstall + ''; - dontFixup = true; + dontFixup = true; - passthru = { - inherit rtlDesignMetadata; - emu-result = makeEmuResult caseDrv; - }; - - } // overrides); + passthru = { + inherit rtlDesignMetadata; + emu-result = makeEmuResult caseDrv; + }; + } + overrides); # end of recursiveUpdate in caseDrv diff --git a/tests/codegen/default.nix b/tests/codegen/default.nix index 9f438a730..481a54e30 100644 --- a/tests/codegen/default.nix +++ b/tests/codegen/default.nix @@ -62,9 +62,9 @@ let ) rawCaseNames)); - commonTests = buildTestsFromFile ./common.txt { featuresRequired = { extensions = [ ]; }; }; - fpTests = buildTestsFromFile ./fp.txt { featuresRequired = { extensions = [ "zve32f" ]; }; }; - zvbbTests = buildTestsFromFile ./zvbb.txt { featuresRequired = { extensions = [ "zvbb" ]; }; }; + commonTests = buildTestsFromFile ./common.txt { passthru.featuresRequired = { extensions = [ ]; }; }; + fpTests = buildTestsFromFile ./fp.txt { passthru.featuresRequired = { extensions = [ "zve32f" ]; }; }; + zvbbTests = buildTestsFromFile ./zvbb.txt { passthru.featuresRequired = { extensions = [ "zvbb" ]; }; }; in lib.recurseIntoAttrs ( commonTests // diff --git a/tests/default.nix b/tests/default.nix index 535922886..a1711c540 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -1,13 +1,19 @@ { lib -, configName -, rtlDesignMetadata , newScope , rv32-stdenv , runCommand -, verilator-emu -, verilator-emu-trace -, vcs-emu -, vcs-emu-trace + +, configName +, rtlDesignMetadata + +, t1rocket-emu ? null +, t1rocket-emu-trace ? null + +, verilator-emu ? null +, verilator-emu-trace ? null + +, vcs-emu ? null +, vcs-emu-trace ? null }: let @@ -15,10 +21,9 @@ let let val = builtins.tryEval (lib.toInt - (lib.toLower - (lib.removeSuffix "b" - (lib.removePrefix "zvl" - (lib.toLower ext))))); + (lib.removeSuffix "b" + (lib.removePrefix "zvl" + (lib.toLower ext)))); in if val.success then val.value @@ -66,7 +71,15 @@ let scope = lib.recurseIntoAttrs (lib.makeScope newScope (casesSelf: { recurseForDerivations = true; - inherit verilator-emu verilator-emu-trace vcs-emu vcs-emu-trace rtlDesignMetadata featuresSet; + inherit + verilator-emu + verilator-emu-trace + vcs-emu + vcs-emu-trace + t1rocket-emu + t1rocket-emu-trace + rtlDesignMetadata + featuresSet; makeEmuResult = casesSelf.callPackage ./make-emu-result.nix { }; @@ -83,7 +96,7 @@ let in if lib.pathExists extraFeatures then builtins.fromJSON (lib.fileContents extraFeatures) - else [ ]; + else { }; filterByFeatures = caseName: caseDrv: assert lib.assertMsg (caseDrv ? featuresRequired) "${caseName} doesn't have features specified"; @@ -175,7 +188,7 @@ let in runCommand "catch-${configName}-all-vcs-emu-result-for-ci" { } script; - all = + _all = let allCases = lib.filter lib.isDerivation @@ -195,4 +208,4 @@ let { } script; in -lib.recurseIntoAttrs (scopeStripped // { inherit all _allEmuResult _allVCSEmuResult; }) +lib.recurseIntoAttrs (scopeStripped // { inherit _all _allEmuResult _allVCSEmuResult; }) diff --git a/tests/intrinsic/default.nix b/tests/intrinsic/default.nix index 3dadca131..146d8efd0 100644 --- a/tests/intrinsic/default.nix +++ b/tests/intrinsic/default.nix @@ -14,7 +14,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; buildPhase = '' runHook preBuild diff --git a/tests/make-emu-result.nix b/tests/make-emu-result.nix index 1df18c8db..9b15191b3 100644 --- a/tests/make-emu-result.nix +++ b/tests/make-emu-result.nix @@ -3,18 +3,22 @@ , stdenvNoCC , jq , zstd -, verilator-emu -, verilator-emu-trace -, vcs-emu -, vcs-emu-trace -, elaborateConfigJson + +, t1rocket-emu ? null +, t1rocket-emu-trace ? null + +, verilator-emu ? null +, verilator-emu-trace ? null + +, vcs-emu ? null +, vcs-emu-trace ? null }: # makeEmuResult arg testCase: -let - self = stdenvNoCC.mkDerivation { +rec { + verilator-check = stdenvNoCC.mkDerivation { name = "${testCase.pname}-emu-result"; nativeBuildInputs = [ zstd jq ]; @@ -39,7 +43,13 @@ let echo "[nix] Running test case ${testCase.pname} with args $emuDriverArgs" - RUST_BACKTRACE=full "$emuDriver" $emuDriverArgs 2> "$rtlEventOutPath" + export RUST_BACKTRACE=full + if ! "$emuDriver" $emuDriverArgs 2> "$rtlEventOutPath"; then + echo "[nix] online driver run failed" + cat $rtlEventOutPath + echo "[nix] Rerun with command: '$emuDriver $emuDriverArgs'" + exit 1 + fi echo "[nix] online driver done" @@ -63,6 +73,17 @@ let exit 1 fi + if [ -z "$postCheck" ]; then + set +e + mkdir -p "$out" + "${verilator-emu}/bin/offline" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --log-file $rtlEventOutPath \ + --log-level ERROR &> $out/offline-check-journal + printf "$?" > $out/offline-check-status + set -e + fi + runHook postCheck ''; @@ -79,114 +100,98 @@ let runHook postInstall ''; + }; - passthru.with-trace = self.overrideAttrs (old: { - name = old.name + "-with-trace"; - emuDriver = "${verilator-emu-trace}/bin/online_drive"; - emuDriverArgs = old.emuDriverArgs or [ ] ++ [ "--wave-path" "${placeholder "out"}/wave.fst" ]; - postCheck = '' - if [ ! -r "$out/wave.fst" ]; then - echo -e "[nix] \033[0;31mInternal Error\033[0m: waveform not found in output" - exit 1 - fi - ''; - }); - - passthru.with-offline = self.overrideAttrs (old: { - name = old.name + "-with-offline"; - preInstall = '' - set +e - "${verilator-emu}/bin/offline" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --log-file $rtlEventOutPath \ - --log-level ERROR &> $out/offline-check-journal - printf "$?" > $out/offline-check-status - set -e - ''; - }); + verilator-check-trace = lib.overrideDerivation verilator-check (old: { + name = old.name + "-with-trace"; + emuDriver = "${verilator-emu-trace}/bin/online_drive"; + emuDriverArgs = old.emuDriverArgs or [ ] ++ [ "--wave-path" "${placeholder "out"}/wave.fst" ]; + postCheck = '' + if [ ! -r "$out/wave.fst" ]; then + echo -e "[nix] \033[0;31mInternal Error\033[0m: waveform not found in output" + exit 1 + fi + ''; + }); - passthru.with-vcs = self.overrideAttrs (old: { - name = old.name + "-with-vcs"; - __noChroot = true; - dontPatchELF = true; + vcs-check = lib.overrideDerivation verilator-check (old: { + name = old.name + "-with-vcs"; + __noChroot = true; + dontPatchELF = true; - buildPhase = '' - runHook preBuild + buildPhase = '' + runHook preBuild - mkdir -p "$out" - echo "[nix] Running VCS for ${testCase.pname}" + mkdir -p "$out" + echo "[nix] Running VCS for ${testCase.pname}" - RUST_BACKTRACE=full "${vcs-emu}/bin/t1-vcs-simulator" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - 1> /dev/null \ - 2> $rtlEventOutPath + RUST_BACKTRACE=full "${vcs-emu}/bin/t1-vcs-simulator" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + 1> /dev/null \ + 2> $rtlEventOutPath - echo "[nix] VCS emu done" + echo "[nix] VCS emu done" - runHook postBuild - ''; + runHook postBuild + ''; - postCheck = '' - set +e + postCheck = '' + set +e - "${verilator-emu}/bin/offline" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --log-file $rtlEventOutPath \ - --log-level ERROR &> $out/offline-check-journal - printf "$?" > $out/offline-check-status + "${vcs-emu}/bin/offline" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --log-file $rtlEventOutPath \ + --log-level ERROR &> $out/offline-check-journal + printf "$?" > $out/offline-check-status - set -e - ''; - }); - - # TODO: We should write some framework like NixOS module to overlay these attribute, instead - # of override attribute one by one. - passthru.with-vcs-trace = self.overrideAttrs (old: { - name = old.name + "-with-vcs-trace"; - __noChroot = true; - dontPatchELF = true; - buildPhase = '' - runHook preBuild + set -e + ''; + }); - mkdir -p "$out" - echo "[nix] Running VCS(TRACE) for ${testCase.pname}" + vcs-trace-check = lib.overrideDerivation verilator-check (old: { + name = old.name + "-with-vcs-trace"; + __noChroot = true; + dontPatchELF = true; + buildPhase = '' + runHook preBuild - RUST_BACKTRACE=full "${vcs-emu-trace}/bin/t1-vcs-simulator" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --wave-path ${testCase.pname}.fsdb \ - 1> /dev/null \ - 2> $rtlEventOutPath + mkdir -p "$out" + echo "[nix] Running VCS(TRACE) for ${testCase.pname}" - echo "[nix] VCS emu done" + RUST_BACKTRACE=full "${vcs-emu-trace}/bin/t1-vcs-simulator" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --wave-path ${testCase.pname}.fsdb \ + 1> /dev/null \ + 2> $rtlEventOutPath - runHook postBuild - ''; + echo "[nix] VCS emu done" - postCheck = '' - set +e + runHook postBuild + ''; - echo "[nix] Checking VCS event log" - "${verilator-emu}/bin/offline" \ - --elf-file ${testCase}/bin/${testCase.pname}.elf \ - --log-file $rtlEventOutPath \ - --log-level ERROR &> $out/offline-check-journal - printf "$?" > $out/offline-check-status - if [ "$(cat $out/offline-check-status)" == "0" ]; then - echo "[nix] VCS difftest PASS" - else - echo "[nix] VCS difftest FAIL" - fi + postCheck = '' + set +e + + echo "[nix] Checking VCS event log" + "${vcs-emu-trace}/bin/offline" \ + --elf-file ${testCase}/bin/${testCase.pname}.elf \ + --log-file $rtlEventOutPath \ + --log-level ERROR &> $out/offline-check-journal + printf "$?" > $out/offline-check-status + if [ "$(cat $out/offline-check-status)" == "0" ]; then + echo "[nix] VCS difftest PASS" + else + echo "[nix] VCS difftest FAIL" + fi - set -e - ''; - - postInstall = '' - # VCS have weird behavior on file creation, it will report read-only filesystem on our output, - # while other tools can mutate file system correctly. - cp ${testCase.pname}.fsdb "$out" - cp -r ${vcs-emu-trace}/lib/t1-vcs-simulator.daidir "$out" - ''; - }); - }; -in -self + set -e + ''; + + postInstall = '' + # VCS have weird behavior on file creation, it will report read-only filesystem on our output, + # while other tools can mutate file system correctly. + cp ${testCase.pname}.fsdb "$out" + cp -r ${vcs-emu-trace}/lib/t1-vcs-simulator.daidir "$out" + ''; + }); +} diff --git a/tests/mlir/default.nix b/tests/mlir/default.nix index 96ba1218f..486506932 100644 --- a/tests/mlir/default.nix +++ b/tests/mlir/default.nix @@ -14,7 +14,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; nativeBuildInputs = [ buddy-mlir ]; diff --git a/tests/perf/llama/default.nix b/tests/perf/llama/default.nix index 7f8743d09..c72efb72d 100644 --- a/tests/perf/llama/default.nix +++ b/tests/perf/llama/default.nix @@ -21,7 +21,7 @@ let in build { - featuresRequired = { + passthru.featuresRequired = { extensions = [ "zve32f" ]; }; diff --git a/tests/pytorch/default.nix b/tests/pytorch/default.nix index 2da4a2609..719ac58cd 100644 --- a/tests/pytorch/default.nix +++ b/tests/pytorch/default.nix @@ -27,7 +27,7 @@ let src = sourcePath; - featuresRequired = getTestRequiredFeatures sourcePath; + passthru.featuresRequired = getTestRequiredFeatures sourcePath; nativeBuildInputs = [ buddy-mlir-pyenv buddy-mlir ]; diff --git a/tests/rvv_bench/default.nix b/tests/rvv_bench/default.nix index 516fa50ff..2e238ee33 100644 --- a/tests/rvv_bench/default.nix +++ b/tests/rvv_bench/default.nix @@ -43,7 +43,7 @@ let patches = [ ./t1_runtime.patch ]; - featuresRequired = lib.optionals (lib.elem caseName fpCases) { extensions = [ "zve32f" ]; }; + passthru.featuresRequired = lib.optionals (lib.elem caseName fpCases) { extensions = [ "zve32f" ]; }; buildPhase = '' runHook preBuild