diff --git a/elaborator/src/rocketv/Rocket.scala b/elaborator/src/rocketv/Rocket.scala
index 99f90d17ad..f5954500e0 100644
--- a/elaborator/src/rocketv/Rocket.scala
+++ b/elaborator/src/rocketv/Rocket.scala
@@ -34,7 +34,8 @@ object Rocket extends Elaborator {
     @arg(name = "fastLoadByte") fastLoadByte:                     Boolean,
     @arg(name = "fastLoadWord") fastLoadWord:                     Boolean,
     @arg(name = "dcacheNSets") dcacheNSets:                       Int,
-    @arg(name = "flushOnFenceI") flushOnFenceI:                   Boolean) {
+    @arg(name = "flushOnFenceI") flushOnFenceI:                   Boolean,
+    @arg(name = "usingT1") usingT1:                               Boolean) {
     def convert: RocketParameter = RocketParameter(
       useAsyncReset,
       clockGate,
@@ -61,7 +62,8 @@ object Rocket extends Elaborator {
       fastLoadByte,
       fastLoadWord,
       dcacheNSets,
-      flushOnFenceI
+      flushOnFenceI,
+      usingT1
     )
   }
 
diff --git a/elaborator/src/t1rocketv/T1RocketTile.scala b/elaborator/src/t1rocketv/T1RocketTile.scala
new file mode 100644
index 0000000000..3cb8398e25
--- /dev/null
+++ b/elaborator/src/t1rocketv/T1RocketTile.scala
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: 2024 Jiuyang Liu <liu@jiuyang.me>
+package org.chipsalliance.t1.elaborator.t1rocketv
+
+import chisel3.util.BitPat
+import chisel3.util.experimental.BitSet
+import mainargs._
+import org.chipsalliance.t1.elaborator.Elaborator
+import org.chipsalliance.t1.rtl.vrf.RamType
+import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw}
+import org.chipsalliance.t1.tile.{T1RocketTile, T1RocketTileParameter}
+
+// --instructionSets rv32_i --instructionSets rv_a --instructionSets rv_c --instructionSets rv_v --instructionSets Zve32x --instructionSets zvl1024b --cacheBlockBytes 32 --nPMPs 8 --cacheable 80000000-ffffffff --sideEffects 00000000-1fffffff --dcacheNSets 64 --dcacheNWays 4 --dcacheRowBits 32 --iCacheNSets 32 --iCacheNWays 4 --iCachePrefetch false --dLen 256 --vrfBankSize 2 --vrfRamType p0rp1w
+object T1RocketTile extends Elaborator {
+  implicit object BitSetRead extends TokensReader.Simple[BitSet] {
+    def shortName = "bitset"
+    def read(strs: Seq[String]) = {
+      Right(
+        strs.head
+          .split(",")
+          .map { opt =>
+            if (opt.contains("-")) {
+              val range = opt.split("-")
+              require(range.size == 2)
+              val from = BigInt(range.head, 16)
+              val to = BigInt(range.last, 16) + 1
+              BitSet.fromRange(from, to - from, range.head.length * 4)
+            } else if (opt.contains("+")) {
+              val range = opt.split("\\+")
+              require(range.size == 2)
+              val from = BigInt(range.head, 16)
+              val length = BigInt(range.last, 16)
+              BitSet.fromRange(from, length, range.head.length * 4)
+            } else {
+              BitPat(s"b$opt")
+            }
+          }
+          .reduce(_.union(_))
+      )
+    }
+  }
+
+  implicit object RamTypeRead extends TokensReader.Simple[RamType] {
+    def shortName = "ramtype"
+    def read(strs: Seq[String]) = {
+      Right(
+        strs.head match {
+          case "p0rw" => p0rw
+          case "p0rp1w" => p0rp1w
+          case "p0rwp1rw" => p0rwp1rw
+        }
+      )
+    }
+  }
+
+  @main
+  case class T1RocketTileParameterMain(
+                                        @arg(name = "instructionSets") instructionSets: Seq[String],
+                                        @arg(name = "cacheBlockBytes") cacheBlockBytes: Int,
+                                        @arg(name = "nPMPs") nPMPs: Int,
+                                        @arg(name = "cacheable") cacheable: BitSet,
+                                        @arg(name = "sideEffects") sideEffects: BitSet,
+                                        @arg(name = "dcacheNSets") dcacheNSets: Int,
+                                        @arg(name = "dcacheNWays") dcacheNWays: Int,
+                                        @arg(name = "dcacheRowBits") dcacheRowBits: Int,
+                                        @arg(name = "iCacheNSets") iCacheNSets: Int,
+                                        @arg(name = "iCacheNWays") iCacheNWays: Int,
+                                        @arg(name = "iCachePrefetch") iCachePrefetch: Boolean,
+                                        @arg(name = "dLen") dLen: Int,
+                                        @arg(name = "vrfBankSize") vrfBankSize: Int,
+                                        @arg(name = "vrfRamType") vrfRamType: RamType
+                                      ) {
+    def convert: T1RocketTileParameter = T1RocketTileParameter(
+      instructionSets: Seq[String],
+      cacheBlockBytes: Int,
+      nPMPs: Int,
+      cacheable: BitSet,
+      sideEffects: BitSet,
+      dcacheNSets: Int,
+      dcacheNWays: Int,
+      dcacheRowBits: Int,
+      iCacheNSets: Int,
+      iCacheNWays: Int,
+      iCachePrefetch: Boolean,
+      dLen: Int,
+      vrfBankSize: Int,
+      vrfRamType: RamType
+    )
+  }
+
+  implicit def T1RocketTileParameterMainParser: ParserForClass[T1RocketTileParameterMain] =
+    ParserForClass[T1RocketTileParameterMain]
+
+  @main
+  def config(@arg(name = "parameter") parameter: T1RocketTileParameterMain) = configImpl(parameter.convert)
+
+  @main
+  def design(@arg(name = "parameter") parameter: os.Path, @arg(name = "run-firtool") runFirtool: mainargs.Flag) =
+    designImpl[T1RocketTile, T1RocketTileParameter](parameter, runFirtool.value)
+
+  def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args)
+}
diff --git a/rocketv/src/Bundle.scala b/rocketv/src/Bundle.scala
index 2d9950261b..eb60ae0c7f 100644
--- a/rocketv/src/Bundle.scala
+++ b/rocketv/src/Bundle.scala
@@ -5,7 +5,7 @@
 package org.chipsalliance.rocketv
 
 import chisel3._
-import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, isPow2, log2Ceil}
+import chisel3.util.{Cat, Decoupled, DecoupledIO, Valid, ValidIO, isPow2, log2Ceil}
 
 // This file defines Bundle shared in the project.
 // all Bundle only have datatype without any helper or functions, while they only exist in the companion Bundle.
@@ -1398,3 +1398,44 @@ class FrontendBundle(vaddrBitsExtended: Int, vaddrBits: Int, asidBits: Int, entr
   val ptw = new TLBPTWIO(nPMPs, vpnBits, paddrBits, vaddrBits, pgLevels, xLen, maxPAddrBits, pgIdxBits)
   val errors = new ICacheErrors(hasCorrectable, hasUncorrectable, paddrBits)
 }
+
+// Interface between T1 <> Rocket integration
+class RocketCoreToT1(xLen: Int, vlWidth: Int) extends Bundle {
+  val issue: DecoupledIO[T1Issue] = Decoupled(new T1Issue(xLen, vlWidth))
+  val retire: T1Retire = Flipped(new T1Retire(xLen))
+}
+
+class T1Issue(xLen: Int, vlWidth: Int) extends Bundle {
+  val instruction: UInt = UInt(32.W)
+  val rs1Data: UInt = UInt(xLen.W)
+  val rs2Data: UInt = UInt(xLen.W)
+  val vtype: UInt = UInt(32.W)
+  val vl:    UInt = UInt(32.W)
+  val vstart: UInt = UInt(32.W)
+  val vcsr: UInt = UInt(32.W)
+}
+
+object T1Issue {
+  def vlmul(issue: T1Issue): UInt = issue.vtype(2, 0)
+  def vsew(issue: T1Issue): UInt = issue.vtype(5, 3)
+  def vta(issue: T1Issue): Bool = issue.vtype(6)
+  def vma(issue: T1Issue): Bool = issue.vtype(7)
+  def vxrm(issue: T1Issue): UInt = issue.vcsr(2, 1)
+}
+
+class T1RdRetire(xLen: Int) extends Bundle {
+  val rdAddress: UInt = UInt(5.W)
+  val rdData:    UInt = UInt(xLen.W)
+  val isFp:      Bool = Bool()
+}
+
+class T1CSRRetire extends Bundle {
+  val vxsat: UInt = UInt(32.W)
+  val fflag: UInt = UInt(32.W)
+}
+
+class T1Retire(xLen: Int) extends Bundle {
+  val rd:  Valid[T1RdRetire] = Valid(new T1RdRetire(xLen))
+  val csr: Valid[T1CSRRetire] = Valid(new T1CSRRetire)
+  val mem: Valid[Bundle] = Valid(new Bundle {})
+}
\ No newline at end of file
diff --git a/rocketv/src/RocketCore.scala b/rocketv/src/RocketCore.scala
index 7c9a9a4765..b242164335 100644
--- a/rocketv/src/RocketCore.scala
+++ b/rocketv/src/RocketCore.scala
@@ -10,7 +10,7 @@ import chisel3.experimental.{SerializableModule, SerializableModuleParameter}
 import chisel3.probe.{Probe, ProbeValue, define}
 import chisel3.util.circt.ClockGate
 import chisel3.util.experimental.decode.DecodeBundle
-import chisel3.util.{BitPat, Cat, Fill, MuxLookup, PriorityEncoder, PriorityMux, RegEnable, log2Ceil}
+import chisel3.util.{BitPat, Cat, DecoupledIO, Fill, MuxLookup, PriorityEncoder, PriorityMux, Queue, RegEnable, log2Ceil, log2Up}
 import org.chipsalliance.rocketv.rvdecoderdbcompat.Causes
 import org.chipsalliance.rvdecoderdb.Instruction
 
@@ -51,9 +51,12 @@ case class RocketParameter(
                             fastLoadByte: Boolean,
                             fastLoadWord: Boolean,
                             dcacheNSets: Int,
-                            flushOnFenceI: Boolean
+                            flushOnFenceI: Boolean,
+                            usingT1: Boolean
                           )
   extends SerializableModuleParameter {
+  // interface to T1
+  def usingVector = hasInstructionSet("rv_v")
 
   // fixed for now
   def usingRVE = false
@@ -143,8 +146,6 @@ case class RocketParameter(
   // static to false for now
   def usingNMI = hasInstructionSet("rv_smrnmi")
 
-  def usingVector = hasInstructionSet("rv_v")
-
   // calculated parameter
   def fetchWidth: Int = 1
 
@@ -317,6 +318,7 @@ class RocketInterface(parameter: RocketParameter) extends Bundle {
     )
   )
   val fpu = parameter.fLen.map(fLen => Flipped(new FPUCoreIO(parameter.hartIdLen, parameter.xLen, fLen)))
+  val t1 = Option.when(parameter.usingT1)(new RocketCoreToT1(parameter.xLen, parameter.vLen))
   val bpwatch = Output(Vec(parameter.nBreakpoints, new BPWatch))
   val cease = Output(Bool())
   val wfi = Output(Bool())
@@ -1340,62 +1342,82 @@ class Rocket(val parameter: RocketParameter)
       fpu.keep_clock_enabled := false.B
     }
 
-// @todo get back t1.
-//    t1Request.foreach { t1 =>
-//      // Send instruction to T1 when write back.
-//      t1.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
-//      t1.bits.instruction := wbRegInstruction
-//      t1.bits.rs1Data := wbRegWdata
-//      t1.bits.rs2Data := wbRegRS2
-//      val response: DecoupledIO[VectorResponse] = t1Response.get
-//      // TODO: make it configurable
-//      val maxCount: Int = 32
-//      val countWidth = log2Up(maxCount)
-//      def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
-//        val counter: UInt = RegInit(0.U(size.W))
-//        val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
-//        val updateCounter = grant ^ release
-//        when(updateCounter) {
-//          counter := nextCount
-//        }
-//        flush.foreach(f => when(f)(counter := 0.U))
-//        val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
-//        val fullCounter: Int = (1 << size) - 1 - margin
-//        val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
-//        (empty, full)
-//      }
-//      // Maintain lsu counter
-//      val lsuGrant:   Bool = t1.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
-//      val lsuRelease: Bool = response.fire && response.bits.mem
-//      val (lsuEmpty, _) = counterManagement(countWidth)(lsuGrant, lsuRelease)
-//      // Maintain vector counter
-//      // There may be 4 instructions in the pipe
-//      val (vectorEmpty, vectorFull) = counterManagement(countWidth, 4)(t1.valid, t1IssueQueueRelease.get)
-//      vectorLSUEmpty.foreach(_ := lsuEmpty)
-//      vectorQueueFull.foreach(_ := vectorFull)
-//    }
-//    // todo: vector change csr
-//    t1Response.foreach { vectorResponse =>
-//      val vectorTryToWriteRd = vectorResponse.bits.rd.valid && !vectorResponse.bits.float
-//      val vectorTryToWriteFP = vectorResponse.bits.rd.valid && vectorResponse.bits.float
-//      vectorResponse.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) &&
-//        (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
-//      when(vectorResponse.fire && vectorTryToWriteRd) {
-//        longlatencyWdata := vectorResponse.bits.data
-//        longlatencyWaddress := vectorResponse.bits.rd.bits
-//        longLatencyWenable := true.B
-//      }
-//      fpu.foreach { fpu =>
-//        when(!(dmemResponseValid && dmemResponseFpu)) {
-//          fpu.dmem_resp_val := vectorResponse.fire && vectorTryToWriteFP
-//          fpu.dmem_resp_data := vectorResponse.bits.data
-//          // todo: 32 bit only
-//          fpu.dmem_resp_type := 2.U
-//          // todo: connect tag
-//          fpu.dmem_resp_tag := 0.U
-//        }
-//      }
-//    }
+    // TODO: T1 only logic
+    io.t1.foreach { t1 =>
+      // T1 Issue
+      val maxCount: Int = 32
+      val t1IssueQueue = Module(new Queue(chiselTypeOf(t1.issue.bits), maxCount))
+      t1IssueQueue.io.enq.valid := wbRegValid && !replayWbCommon && wbRegDecodeOutput(parameter.decoderParameter.vector)
+      t1IssueQueue.io.enq.bits.instruction := wbRegInstruction
+      t1IssueQueue.io.enq.bits.rs1Data := wbRegWdata
+      t1IssueQueue.io.enq.bits.rs2Data := wbRegRS2
+      t1.issue.valid := t1IssueQueue.io.deq.valid
+      t1.issue.bits := t1IssueQueue.io.deq.bits
+      t1IssueQueue.io.deq.ready := t1.issue.ready
+      // For each different retirements, it should maintain different scoreboard
+      val t1CSRRetireQueue: Queue[T1CSRRetire] = Module(new Queue(chiselTypeOf(t1.retire.csr.bits), maxCount))
+      val t1XRDRetireQueue: Queue[T1RdRetire] = Module(new Queue(chiselTypeOf(t1.retire.rd.bits), maxCount))
+
+      val countWidth = log2Up(maxCount)
+      def counterManagement(size: Int, margin: Int = 0)(grant: Bool, release: Bool, flush: Option[Bool] = None) = {
+        val counter: UInt = RegInit(0.U(size.W))
+        val nextCount = counter + Mux(grant, 1.U(size.W), (-1.S(size.W)).asUInt)
+        val updateCounter = grant ^ release
+        when(updateCounter) {
+          counter := nextCount
+        }
+        flush.foreach(f => when(f)(counter := 0.U))
+        val empty = (updateCounter && nextCount === 0.U) || counter === 0.U
+        val fullCounter: Int = (1 << size) - 1 - margin
+        val full = (updateCounter && nextCount >= fullCounter.U) || counter >= fullCounter.U
+        (empty, full)
+      }
+      // T1 Memory Scoreboard
+      val t1MemoryGrant:   Bool = t1IssueQueue.io.enq.valid && wbRegDecodeOutput(parameter.decoderParameter.vectorLSU)
+      val t1MemoryRelease: Bool = t1.retire.mem.fire
+      // todo: handle vector lsu in pipe
+      // +1: There are instructions that will enter t1
+      val (lsuEmpty, _) = counterManagement(countWidth + 1)(t1MemoryGrant, t1MemoryRelease)
+      // T1 CSR Scoreboard
+      // todo: add wbRegDecodeOutput(vectorWriteCsr)
+      val t1CSRGrant:   Bool = false.B
+      val t1CSRRelease: Bool = false.B // t1CSRRetireQueue.io.deq.fire
+      val (t1CSREmpty, _) = counterManagement(countWidth + 1)(t1CSRGrant, t1CSRRelease)
+      // T1 XRD Scoreboard?
+
+      // Maintain vector counter
+      // There may be 4 instructions in the pipe
+      val (_, vectorFull) = counterManagement(countWidth, 4)(t1IssueQueue.io.enq.valid, t1.issue.fire)
+      vectorLSUEmpty.foreach(_ := lsuEmpty)
+      vectorQueueFull.foreach(_ := vectorFull)
+
+      t1XRDRetireQueue.io.enq.valid := t1.retire.rd.valid
+      t1XRDRetireQueue.io.enq.bits := t1.retire.rd.bits
+      t1CSRRetireQueue.io.enq.valid := t1.retire.csr.valid
+      t1CSRRetireQueue.io.enq.bits := t1.retire.csr.bits
+      // todo: write csr here
+      t1CSRRetireQueue.io.deq.ready := true.B
+
+      val vectorTryToWriteRd = t1XRDRetireQueue.io.deq.valid && !t1XRDRetireQueue.io.deq.bits.isFp
+      val vectorTryToWriteFP = t1XRDRetireQueue.io.deq.valid && t1XRDRetireQueue.io.deq.bits.isFp
+      t1XRDRetireQueue.io.deq.ready := (!(wbWxd || (dmemResponseReplay && dmemResponseXpu)) || !vectorTryToWriteRd) && (!(dmemResponseReplay && dmemResponseFpu) || !vectorTryToWriteFP)
+
+      when(t1.retire.rd.fire && vectorTryToWriteRd) {
+        longlatencyWdata := t1.retire.rd.bits.rdData
+        longlatencyWaddress := t1.retire.rd.bits.rdAddress
+        longLatencyWenable := true.B
+      }
+      io.fpu.foreach { fpu =>
+        when(!(dmemResponseValid && dmemResponseFpu)) {
+          fpu.dmem_resp_val := t1.retire.mem.fire && vectorTryToWriteFP
+          fpu.dmem_resp_data := t1.retire.rd.bits.rdData
+          // todo: 32 bit only
+          fpu.dmem_resp_type := 2.U
+          // todo: connect tag
+          fpu.dmem_resp_tag := 0.U
+        }
+      }
+    }
 
     io.dmem.req.valid := exRegValid && exRegDecodeOutput(parameter.decoderParameter.mem)
     val ex_dcache_tag = Cat(exWaddr, Option.when(usingFPU)(exRegDecodeOutput(parameter.decoderParameter.fp)).getOrElse(false.B))
diff --git a/rocketv/src/RocketTile.scala b/rocketv/src/RocketTile.scala
index b70ecaaa3e..2a8ce3d8c6 100644
--- a/rocketv/src/RocketTile.scala
+++ b/rocketv/src/RocketTile.scala
@@ -244,7 +244,8 @@ case class RocketTileParameter(
     fastLoadByte,
     fastLoadWord,
     dcacheNSets,
-    flushOnFenceI
+    flushOnFenceI,
+    usingT1 = false
   )
 
   def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter(
diff --git a/t1rocketv/src/T1RocketTile.scala b/t1rocketv/src/T1RocketTile.scala
new file mode 100644
index 0000000000..452f760841
--- /dev/null
+++ b/t1rocketv/src/T1RocketTile.scala
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: 2024 Jiuyang Liu <liu@jiuyang.me>
+package org.chipsalliance.t1.tile
+
+import chisel3._
+import chisel3.experimental.hierarchy.{Instance, Instantiate}
+import chisel3.experimental.{SerializableModule, SerializableModuleGenerator, SerializableModuleParameter}
+import chisel3.util.experimental.BitSet
+import chisel3.util.log2Ceil
+import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4ROIrrevocable, AXI4RWIrrevocable}
+import org.chipsalliance.rocketv.{BHTParameter, FPU, FPUParameter, Frontend, FrontendParameter, HellaCache, HellaCacheArbiter, HellaCacheArbiterParameter, HellaCacheParameter, PTW, PTWParameter, Rocket, RocketParameter, RocketTileParameter}
+import org.chipsalliance.rvdecoderdb.Instruction
+import org.chipsalliance.t1.rtl.decoder.T1CustomInstruction
+import org.chipsalliance.t1.rtl.vrf.RamType
+import org.chipsalliance.t1.rtl.vrf.RamType.{p0rp1w, p0rw, p0rwp1rw}
+import org.chipsalliance.t1.rtl.{LaneAdder, LaneAdderParam, LaneDiv, LaneDivFP, LaneDivFPParam, LaneDivParam, LaneFloat, LaneFloatParam, LaneMul, LaneMulParam, LaneShifter, LaneShifterParameter, LogicParam, MaskedLogic, OtherUnit, OtherUnitParam, T1, T1Parameter, VFUInstantiateParameter}
+
+object T1RocketTileParameter {
+  implicit def bitSetP: upickle.default.ReadWriter[BitSet] = upickle.default
+    .readwriter[String]
+    .bimap[BitSet](
+      bs => bs.terms.map("b" + _.rawString).mkString("\n"),
+      str => if (str.isEmpty) BitSet.empty else BitSet.fromString(str)
+    )
+
+  implicit val vrfRamTypeP: upickle.default.ReadWriter[RamType] = upickle.default.ReadWriter.merge(
+    upickle.default.macroRW[p0rw.type],
+    upickle.default.macroRW[p0rp1w.type],
+    upickle.default.macroRW[p0rwp1rw.type]
+  )
+
+  implicit def rwP: upickle.default.ReadWriter[T1RocketTileParameter] = upickle.default.macroRW[T1RocketTileParameter]
+}
+
+case class T1RocketTileParameter(
+                                  instructionSets: Seq[String],
+                                  cacheBlockBytes: Int,
+                                  nPMPs: Int,
+                                  cacheable: BitSet,
+                                  sideEffects: BitSet,
+                                  dcacheNSets: Int,
+                                  dcacheNWays: Int,
+                                  dcacheRowBits: Int,
+                                  iCacheNSets: Int,
+                                  iCacheNWays: Int,
+                                  iCachePrefetch: Boolean,
+                                  dLen: Int,
+                                  vrfBankSize: Int,
+                                  vrfRamType: RamType)
+  extends SerializableModuleParameter {
+  require(instructionSets.count(Seq("Zve32x", "Zve32f").contains) == 1, "at least support one Zve32x or Zve32f")
+
+  val useAsyncReset: Boolean = false
+  val clockGate: Boolean = false
+
+  val paddrBits: Int = xLen
+  // TODO: add S in the future
+  val priv: String = "m"
+  val hartIdLen: Int = 1
+  val useBPWatch: Boolean = false
+  val mcontextWidth: Int = 0
+  val scontextWidth: Int = 0
+  val asidBits: Int = 0
+  val resetVectorBits: Int = paddrBits
+  val nBreakpoints: Int = 0
+  // TODO: set to 0
+  val dtlbNSets: Int = 1
+  val dtlbNWays: Int = 32
+  val itlbNSets: Int = 1
+  val itlbNWays: Int = 32
+  val itlbNSectors: Int = 4
+  val itlbNSuperpageEntries: Int = 4
+  val nPTECacheEntries: Int = 9
+  val nL2TLBWays: Int = 1
+  val nL2TLBEntries: Int = 0
+  // T1 doens't check exception.
+  val legal: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val read: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val write: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val putPartial: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val logic: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val arithmetic: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val exec: BitSet = BitSet.fromRange(0, 1 << paddrBits)
+  val btbEntries: Int = 28
+  val btbNMatchBits: Int = 14
+  val btbUpdatesOutOfOrder: Boolean = false
+  val nPages: Int = 6
+  val nRAS: Int = 6
+  val bhtParameter: Option[BHTParameter] = Some(BHTParameter(nEntries = 512, counterLength = 1, historyLength = 8, historyBits = 3))
+  // TODO: remove it
+  val mulDivLatency: Int = 0
+  val divUnroll: Int = 1
+  val divEarlyOut: Boolean = false
+  val divEarlyOutGranularity: Int = 1
+  val mulUnroll: Int = 1
+  val mulEarlyOut: Boolean = false
+  val sfmaLatency: Int = 3
+  val dfmaLatency: Int = 4
+  val divSqrt: Boolean = true
+  // TODO: check decoder
+  val flushOnFenceI: Boolean = true
+  val fastLoadByte: Boolean = false
+  val fastLoadWord: Boolean = true
+  val maxUncachedInFlight: Int = 1
+  val separateUncachedResp: Boolean = false
+
+
+  // calculate
+  def usingUser: Boolean = priv.contains("u")
+
+  def usingSupervisor: Boolean = priv.contains("s")
+
+  def vLen: Int = instructionSets.collectFirst {
+    case s"zvl${vlen}b" => vlen.toInt
+  }.get
+
+  // static for now
+  def hasBeu: Boolean = false
+
+  def usingNMI: Boolean = false
+
+  def usingHypervisor: Boolean = false
+
+  def usingDataScratchpad: Boolean = false
+
+  def nLocalInterrupts: Int = 0
+
+  def dcacheArbPorts: Int = 2
+
+  def tagECC: Option[String] = None
+
+  def dataECC: Option[String] = None
+
+  def pgLevelBits: Int = 10 - log2Ceil(xLen / 32)
+
+  def instructions: Seq[Instruction] =
+    org.chipsalliance.rvdecoderdb
+      .instructions(
+        org.chipsalliance.rvdecoderdb.extractResource(getClass.getClassLoader)
+      )
+      .filter(instruction =>
+        (
+          instructionSets ++
+            // Four mandatory instruction sets.
+            Seq("rv_i", "rv_zicsr", "rv_zifencei", "rv_system")
+          ).contains(instruction.instructionSet.name)
+      )
+      .toSeq
+      .filter {
+        // special case for rv32 pseudo from rv64
+        case i if i.pseudoFrom.isDefined && Seq("slli", "srli", "srai").contains(i.name) => true
+        case i if i.pseudoFrom.isDefined => false
+        case _ => true
+      }
+      .sortBy(i => (i.instructionSet.name, i.name))
+
+  private def hasInstructionSet(setName: String): Boolean =
+    instructions.flatMap(_.instructionSets.map(_.name)).contains(setName)
+
+  def usingBTB: Boolean = btbEntries > 0
+
+  def xLen: Int =
+    (hasInstructionSet("rv32_i"), hasInstructionSet("rv64_i")) match {
+      case (true, true) => throw new Exception("cannot support both rv32 and rv64 together")
+      case (true, false) => 32
+      case (false, true) => 64
+      case (false, false) => throw new Exception("no basic instruction found.")
+    }
+
+  def fLen: Option[Int] =
+    (
+      hasInstructionSet("rv_f") || hasInstructionSet("rv64_f"),
+      hasInstructionSet("rv_d") || hasInstructionSet("rv64_d")
+    ) match {
+      case (false, false) => None
+      case (true, false) => Some(32)
+      case (false, true) => Some(64)
+      case (true, true) => Some(64)
+    }
+
+  def usingVM = hasInstructionSet("sfence.vma")
+
+  def pgLevels: Int = xLen match {
+    case 32 => 2
+    case 64 => 3
+  }
+
+  def usingAtomics = hasInstructionSet("rv_a") || hasInstructionSet("rv64_a")
+
+  def usingCompressed = hasInstructionSet("rv_c")
+
+  def minFLen: Option[Int] =
+    if (hasInstructionSet("rv_zfh") || hasInstructionSet("rv64_zfh") || hasInstructionSet("rv_d_zfh"))
+      Some(16)
+    else
+      fLen
+
+  def rocketParameter: RocketParameter = RocketParameter(
+    useAsyncReset,
+    clockGate,
+    instructionSets.toSet,
+    vLen,
+    usingUser,
+    hartIdLen,
+    nPMPs,
+    asidBits,
+    nBreakpoints,
+    usingBTB,
+    useBPWatch,
+    mcontextWidth,
+    scontextWidth,
+    mulDivLatency,
+    divUnroll,
+    divEarlyOut,
+    divEarlyOutGranularity,
+    mulUnroll,
+    mulEarlyOut,
+    paddrBits,
+    cacheBlockBytes,
+    hasBeu,
+    fastLoadByte,
+    fastLoadWord,
+    dcacheNSets,
+    flushOnFenceI,
+    usingT1 = true
+  )
+
+  def hellaCacheParameter: HellaCacheParameter = HellaCacheParameter(
+    useAsyncReset: Boolean,
+    clockGate: Boolean,
+    xLen: Int,
+    fLen.getOrElse(0): Int,
+    usingVM: Boolean,
+    paddrBits: Int,
+    cacheBlockBytes: Int,
+    dcacheNWays: Int,
+    dcacheNSets: Int,
+    dcacheRowBits: Int,
+    dtlbNSets: Int,
+    dtlbNWays: Int,
+    tagECC: Option[String],
+    dataECC: Option[String],
+    maxUncachedInFlight: Int,
+    separateUncachedResp: Boolean,
+    legal: BitSet,
+    cacheable: BitSet,
+    read: BitSet,
+    write: BitSet,
+    putPartial: BitSet,
+    logic: BitSet,
+    arithmetic: BitSet,
+    exec: BitSet,
+    sideEffects: BitSet
+  )
+
+  def hellaCacheArbiterParameter: HellaCacheArbiterParameter = HellaCacheArbiterParameter(
+    useAsyncReset: Boolean,
+    xLen: Int,
+    fLen.getOrElse(0): Int,
+    paddrBits: Int,
+    cacheBlockBytes: Int,
+    dcacheNSets: Int,
+    usingVM: Boolean,
+    separateUncachedResp: Boolean
+  )
+
+  def ptwParameter: PTWParameter = PTWParameter(
+    useAsyncReset: Boolean,
+    clockGate: Boolean,
+    usingVM: Boolean,
+    usingHypervisor: Boolean,
+    xLen: Int,
+    fLen.getOrElse(0): Int,
+    paddrBits: Int,
+    asidBits: Int,
+    pgLevels: Int,
+    nPTECacheEntries: Int,
+    nL2TLBWays: Int,
+    nL2TLBEntries: Int,
+    nPMPs: Int
+  )
+
+  def frontendParameter: FrontendParameter = FrontendParameter(
+    useAsyncReset = useAsyncReset: Boolean,
+    clockGate = clockGate: Boolean,
+    xLen = xLen: Int,
+    usingAtomics = usingAtomics: Boolean,
+    usingDataScratchpad = usingDataScratchpad: Boolean,
+    usingVM = usingVM: Boolean,
+    usingCompressed = usingCompressed: Boolean,
+    usingBTB = usingBTB: Boolean,
+    itlbNSets = itlbNSets: Int,
+    itlbNWays = itlbNWays: Int,
+    itlbNSectors = itlbNSectors: Int,
+    itlbNSuperpageEntries = itlbNSuperpageEntries: Int,
+    blockBytes = cacheBlockBytes: Int,
+    iCacheNSets = iCacheNSets: Int,
+    iCacheNWays = iCacheNWays: Int,
+    iCachePrefetch = iCachePrefetch: Boolean,
+    btbEntries = btbEntries: Int,
+    btbNMatchBits = btbNMatchBits: Int,
+    btbUpdatesOutOfOrder = btbUpdatesOutOfOrder: Boolean,
+    nPages = nPages: Int,
+    nRAS = nRAS: Int,
+    nPMPs = nPMPs: Int,
+    paddrBits = paddrBits: Int,
+    pgLevels = pgLevels: Int,
+    asidBits = asidBits: Int,
+    bhtParameter = bhtParameter: Option[BHTParameter],
+    legal = legal: BitSet,
+    cacheable = cacheable: BitSet,
+    read = read: BitSet,
+    write = write: BitSet,
+    putPartial = putPartial: BitSet,
+    logic = logic: BitSet,
+    arithmetic = arithmetic: BitSet,
+    exec = exec: BitSet,
+    sideEffects = sideEffects: BitSet
+  )
+
+  def fpuParameter: Option[FPUParameter] = fLen.zip(minFLen).map {
+    case (fLen, minFLen) =>
+      FPUParameter(
+        useAsyncReset: Boolean,
+        clockGate: Boolean,
+        xLen: Int,
+        fLen: Int,
+        minFLen: Int,
+        sfmaLatency: Int,
+        dfmaLatency: Int,
+        divSqrt: Boolean,
+        hartIdLen: Int
+      )
+  }
+
+  val vfuInstantiateParameter = if (instructionSets.contains("Zve32f"))
+    VFUInstantiateParameter(
+      slotCount = 4,
+      logicModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3))
+      ),
+      aluModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)),
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)),
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)),
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3))
+      ),
+      shifterModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3))
+      ),
+      mulModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3))
+      ),
+      divModuleParameters = Seq(),
+      divfpModuleParameters =
+        Seq((SerializableModuleGenerator(classOf[LaneDivFP], LaneDivFPParam(32, 1)), Seq(0, 1, 2, 3))),
+      otherModuleParameters =
+        Seq((
+          SerializableModuleGenerator(
+            classOf[OtherUnit],
+            OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
+          ),
+          Seq(0, 1, 2, 3))),
+      floatModuleParameters =
+        Seq((SerializableModuleGenerator(classOf[LaneFloat], LaneFloatParam(32, 3)), Seq(0, 1, 2, 3)))
+    ) else
+    VFUInstantiateParameter(
+      slotCount = 4,
+      logicModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[MaskedLogic], LogicParam(32, 1)), Seq(0, 1, 2, 3))
+      ),
+      aluModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(0)),
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(1)),
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(2)),
+        (SerializableModuleGenerator(classOf[LaneAdder], LaneAdderParam(32, 1)), Seq(3))
+      ),
+      shifterModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneShifter], LaneShifterParameter(32, 1)), Seq(0, 1, 2, 3))
+      ),
+      mulModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneMul], LaneMulParam(32, 2)), Seq(0, 1, 2, 3))
+      ),
+      divModuleParameters = Seq(
+        (SerializableModuleGenerator(classOf[LaneDiv], LaneDivParam(32, 1)), Seq(0, 1, 2, 3))
+      ),
+      divfpModuleParameters = Seq(),
+      otherModuleParameters =
+        Seq((
+          SerializableModuleGenerator(
+            classOf[OtherUnit],
+            OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
+          ),
+          Seq(0, 1, 2, 3))),
+      floatModuleParameters = Seq()
+    )
+
+  def t1Parameter: T1Parameter = T1Parameter(
+    vLen = vLen,
+    dLen = dLen,
+    extensions = instructionSets.filter(Seq("Zve32x", "Zve32f").contains),
+    // empty for now.
+    t1customInstructions = Seq(),
+    vrfBankSize = vrfBankSize,
+    vrfRamType = vrfRamType,
+    vfuInstantiateParameter = vfuInstantiateParameter
+  )
+
+  def instructionFetchParameter: AXI4BundleParameter = frontendParameter.instructionFetchParameter
+
+  def itimParameter: Option[AXI4BundleParameter] = frontendParameter.itimParameter
+
+  def loadStoreParameter: AXI4BundleParameter = hellaCacheParameter.loadStoreParameter
+
+  def dtimParameter: Option[AXI4BundleParameter] = hellaCacheParameter.dtimParameter
+
+  def t1HighBandwidthParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter
+
+  def t1HightOutstandingParameter: AXI4BundleParameter = t1Parameter.axi4BundleParameter.copy(dataWidth = 32)
+}
+
+class T1RocketTileInterface(parameter: T1RocketTileParameter) extends Bundle {
+  val clock = Input(Clock())
+  val reset = Input(if (parameter.useAsyncReset) AsyncReset() else Bool())
+  // todo: Const
+  val hartid = Flipped(UInt(parameter.hartIdLen.W))
+  val resetVector = Input(Const(UInt(parameter.resetVectorBits.W)))
+
+  val debug: Bool = Input(Bool())
+  val mtip:  Bool = Input(Bool())
+  val msip:  Bool = Input(Bool())
+  val meip:  Bool = Input(Bool())
+  val seip:  Option[Bool] = Option.when(parameter.usingSupervisor)(Bool())
+  val lip:   Vec[Bool] = Vec(parameter.nLocalInterrupts, Bool())
+  val nmi = Option.when(parameter.usingNMI)(Bool())
+  val nmiInterruptVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W))
+  val nmiIxceptionVector = Option.when(parameter.usingNMI)(UInt(parameter.resetVectorBits.W))
+  // TODO: buserror should be handled by NMI
+  val buserror: Bool = Input(Bool())
+  val wfi:      Bool = Output(Bool())
+  val halt:     Bool = Output(Bool())
+
+  val instructionFetchAXI: AXI4ROIrrevocable =
+    org.chipsalliance.amba.axi4.bundle.AXI4ROIrrevocable(parameter.instructionFetchParameter)
+  val itimAXI: Option[AXI4RWIrrevocable] =
+    parameter.itimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p)))
+
+  val loadStoreAXI: AXI4RWIrrevocable =
+    org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.loadStoreParameter)
+  val dtimAXI: Option[AXI4RWIrrevocable] =
+    parameter.dtimParameter.map(p => Flipped(org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(p)))
+
+  val highBandwidthAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HighBandwidthParameter)
+  val highOutstandingAXI: AXI4RWIrrevocable = org.chipsalliance.amba.axi4.bundle.AXI4RWIrrevocable(parameter.t1HightOutstandingParameter)
+}
+
+class T1RocketTile(val parameter: T1RocketTileParameter)
+  extends FixedIORawModule(new T1RocketTileInterface(parameter))
+    with SerializableModule[T1RocketTileParameter] {
+  val rocket:     Instance[Rocket] = Instantiate(new Rocket(parameter.rocketParameter))
+  val frontend:   Instance[Frontend] = Instantiate(new Frontend(parameter.frontendParameter))
+  val hellaCache: Instance[HellaCache] = Instantiate(new HellaCache(parameter.hellaCacheParameter))
+  val hellaCacheArbiter: Instance[HellaCacheArbiter] = Instantiate(
+    new HellaCacheArbiter(parameter.hellaCacheArbiterParameter)
+  )
+  val ptw: Instance[PTW] = Instantiate(new PTW(parameter.ptwParameter))
+  val fpu: Option[Instance[FPU]] = parameter.fpuParameter.map(fpuParameter => Instantiate(new FPU(fpuParameter)))
+  val t1: Instance[T1] = Instantiate(new T1(parameter.t1Parameter))
+
+  rocket.io.clock := io.clock
+  rocket.io.reset := io.reset
+  rocket.io.hartid := io.hartid
+  rocket.io.interrupts.debug := io.debug
+  rocket.io.interrupts.mtip := io.mtip
+  rocket.io.interrupts.msip := io.msip
+  rocket.io.interrupts.meip := io.meip
+  rocket.io.interrupts.seip.foreach(_ := io.seip.get)
+  rocket.io.interrupts.lip := io.lip
+  rocket.io.interrupts.nmi.foreach { nmi =>
+    nmi.rnmi := io.nmi.get
+    nmi.rnmi_interrupt_vector := io.nmiInterruptVector.get
+    nmi.rnmi_exception_vector := io.nmiIxceptionVector.get
+  }
+  // @todo make it optional
+  rocket.io.buserror := io.buserror
+  io.wfi := rocket.io.wfi
+  io.loadStoreAXI <> hellaCache.io.loadStoreAXI
+  io.dtimAXI.zip(hellaCache.io.dtimAXI).foreach { case (io, hellaCache) => io <> hellaCache }
+  io.instructionFetchAXI <> frontend.io.instructionFetchAXI
+  io.itimAXI.zip(frontend.io.itimAXI).foreach { case (io, frontend) => io <> frontend }
+  // design for halt and beu, only use the halt function for now.
+  io.halt := Seq(frontend.io.nonDiplomatic.errors.uncorrectable, hellaCache.io.errors.uncorrectable)
+    .flatMap(_.map(_.valid))
+    .foldLeft(false.B)(_ || _)
+
+  // rocket core io
+  rocket.io.imem <> frontend.io.nonDiplomatic.cpu
+  hellaCacheArbiter.io.requestor(0) <> rocket.io.dmem
+  rocket.io.ptw <> ptw.io.dpath
+  rocket.io.fpu.zip(fpu.map(_.io.core)).foreach { case (core, fpu) => core <> fpu }
+  // match connect
+  t1.io.issue <> rocket.io.t1.get.issue
+  rocket.io.t1.get.retire <> t1.io.retire
+  // used by trace module
+  rocket.io.bpwatch := DontCare
+  // don't use for now, this is design for report the custom cease status.
+  // rocket.io.cease
+  // it will be used in the future w/ trace support.
+  rocket.io.traceStall := false.B
+
+  // frontend io
+  frontend.io.clock := io.clock
+  frontend.io.reset := io.reset
+  frontend.io.resetVector := io.resetVector
+  ptw.io.requestor(0) <> frontend.io.nonDiplomatic.ptw
+
+  // hellacache io
+  hellaCache.io.clock := io.clock
+  hellaCache.io.reset := io.reset
+  ptw.io.requestor(1) <> hellaCache.io.ptw
+  hellaCache.io.cpu <> hellaCacheArbiter.io.mem
+
+  // ptw io
+  ptw.io.clock := io.clock
+  ptw.io.reset := io.reset
+  hellaCacheArbiter.io.requestor(1) <> ptw.io.mem
+
+  // hellacache arbiter io
+  hellaCacheArbiter.io.clock := io.clock
+  hellaCacheArbiter.io.reset := io.reset
+
+  fpu.foreach { fpu =>
+    fpu.io.clock := io.clock
+    fpu.io.reset := io.reset
+    // @todo: remove it from FPU.
+    fpu.io.cp_req <> DontCare
+    fpu.io.cp_resp <> DontCare
+  }
+  t1.io.clock := io.clock
+  t1.io.reset := io.reset
+  io.highBandwidthAXI <> t1.io.highBandwidthLoadStorePort
+  io.highOutstandingAXI <> t1.io.indexedLoadStorePort
+}