Skip to content

Commit

Permalink
More metrics for RF, RS and ROB (#632)
Browse files Browse the repository at this point in the history
  • Loading branch information
tilk authored Apr 1, 2024
1 parent 6ef2f84 commit f8add3c
Show file tree
Hide file tree
Showing 19 changed files with 466 additions and 53 deletions.
2 changes: 1 addition & 1 deletion coreblocks/cache/icache.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: C
self.perf_misses = HwCounter("frontend.icache.misses")
self.perf_errors = HwCounter("frontend.icache.fetch_errors")
self.perf_flushes = HwCounter("frontend.icache.flushes")
self.req_latency = LatencyMeasurer(
self.req_latency = FIFOLatencyMeasurer(
"frontend.icache.req_latency", "Latencies of cache requests", slots_number=2, max_latency=500
)

Expand Down
29 changes: 28 additions & 1 deletion coreblocks/core_structs/rf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from amaranth import *
from transactron import Method, def_method, TModule
from transactron import Method, Transaction, def_method, TModule
from coreblocks.interface.layouts import RFLayouts
from coreblocks.params import GenParams
from transactron.lib.metrics import HwExpHistogram, TaggedLatencyMeasurer
from transactron.utils.amaranth_ext.functions import popcount
from transactron.utils.transactron_helpers import make_layout

__all__ = ["RegisterFile"]
Expand All @@ -20,9 +22,24 @@ def __init__(self, *, gen_params: GenParams):
self.write = Method(i=layouts.rf_write)
self.free = Method(i=layouts.rf_free)

self.perf_rf_valid_time = TaggedLatencyMeasurer(
"struct.rf.valid_time",
description="Distribution of time registers are valid in RF",
slots_number=2**gen_params.phys_regs_bits,
max_latency=1000,
)
self.perf_num_valid = HwExpHistogram(
"struct.rf.num_valid",
description="Number of valid registers in RF",
bucket_count=gen_params.phys_regs_bits + 1,
sample_width=gen_params.phys_regs_bits + 1,
)

def elaborate(self, platform):
m = TModule()

m.submodules += [self.perf_rf_valid_time, self.perf_num_valid]

being_written = Signal(self.gen_params.phys_regs_bits)
written_value = Signal(self.gen_params.isa.xlen)

Expand Down Expand Up @@ -56,10 +73,20 @@ def _(reg_id: Value, reg_val: Value):
with m.If(~(zero_reg)):
m.d.sync += self.entries[reg_id].reg_val.eq(reg_val)
m.d.sync += self.entries[reg_id].valid.eq(1)
self.perf_rf_valid_time.start(m, slot=reg_id)

@def_method(m, self.free)
def _(reg_id: Value):
with m.If(reg_id != 0):
m.d.sync += self.entries[reg_id].valid.eq(0)
self.perf_rf_valid_time.stop(m, slot=reg_id)

if self.perf_num_valid.metrics_enabled():
num_valid = Signal(self.gen_params.phys_regs_bits + 1)
m.d.comb += num_valid.eq(
popcount(Cat(self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits)))
)
with Transaction(name="perf").body(m):
self.perf_num_valid.add(m, num_valid)

return m
18 changes: 15 additions & 3 deletions coreblocks/core_structs/rob.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from amaranth import *
from transactron import Method, def_method, TModule
from transactron import Method, Transaction, def_method, TModule
from transactron.lib.metrics import *
from coreblocks.interface.layouts import ROBLayouts
from coreblocks.params import GenParams
Expand All @@ -18,17 +18,23 @@ def __init__(self, gen_params: GenParams) -> None:
self.data = Array(Signal(layouts.internal_layout) for _ in range(2**gen_params.rob_entries_bits))
self.get_indices = Method(o=layouts.get_indices, nonexclusive=True)

self.perf_rob_wait_time = LatencyMeasurer(
self.perf_rob_wait_time = FIFOLatencyMeasurer(
"backend.rob.wait_time",
description="Distribution of time instructions spend in ROB",
slots_number=(2**gen_params.rob_entries_bits + 1),
max_latency=1000,
)
self.perf_rob_size = HwExpHistogram(
"backend.rob.size",
description="Number of instructions in ROB",
bucket_count=gen_params.rob_entries_bits + 1,
sample_width=gen_params.rob_entries_bits,
)

def elaborate(self, platform):
m = TModule()

m.submodules += [self.perf_rob_wait_time]
m.submodules += [self.perf_rob_wait_time, self.perf_rob_size]

start_idx = Signal(self.params.rob_entries_bits)
end_idx = Signal(self.params.rob_entries_bits)
Expand Down Expand Up @@ -70,4 +76,10 @@ def _(rob_id: Value, exception):
def _():
return {"start": start_idx, "end": end_idx}

if self.perf_rob_size.metrics_enabled():
rob_size = Signal(self.params.rob_entries_bits)
m.d.comb += rob_size.eq((end_idx - start_idx)[0 : self.params.rob_entries_bits])
with Transaction(name="perf").body(m):
self.perf_rob_size.add(m, rob_size)

return m
1 change: 1 addition & 0 deletions coreblocks/func_blocks/csr/csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def _(rob_id: Value, side_fx: Value):
return m


@dataclass(frozen=True)
class CSRBlockComponent(BlockComponentParams):
def get_module(self, gen_params: GenParams) -> FuncBlock:
connections = gen_params.get(DependencyManager)
Expand Down
32 changes: 30 additions & 2 deletions coreblocks/func_blocks/fu/common/rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,25 @@
from typing import Optional
from amaranth import *
from amaranth.lib.coding import PriorityEncoder
from transactron import Method, def_method, TModule
from transactron import Method, Transaction, def_method, TModule
from coreblocks.params import GenParams
from coreblocks.frontend.decoder import OpType
from coreblocks.interface.layouts import RSLayouts
from transactron.lib.metrics import HwExpHistogram, TaggedLatencyMeasurer
from transactron.utils import RecordDict
from transactron.utils.amaranth_ext.functions import popcount
from transactron.utils.transactron_helpers import make_layout

__all__ = ["RS"]


class RS(Elaboratable):
def __init__(
self, gen_params: GenParams, rs_entries: int, ready_for: Optional[Iterable[Iterable[OpType]]] = None
self,
gen_params: GenParams,
rs_entries: int,
rs_number: int,
ready_for: Optional[Iterable[Iterable[OpType]]] = None,
) -> None:
ready_for = ready_for or ((op for op in OpType),)
self.gen_params = gen_params
Expand All @@ -38,10 +44,24 @@ def __init__(
self.data = Array(Signal(self.internal_layout) for _ in range(self.rs_entries))
self.data_ready = Signal(self.rs_entries)

self.perf_rs_wait_time = TaggedLatencyMeasurer(
f"fu.block_{rs_number}.rs.valid_time",
description=f"Distribution of time instructions wait in RS {rs_number}",
slots_number=2**self.rs_entries_bits,
max_latency=1000,
)
self.perf_num_full = HwExpHistogram(
f"fu.block_{rs_number}.rs.num_full",
description=f"Number of full entries in RS {rs_number}",
bucket_count=self.rs_entries_bits + 1,
sample_width=self.rs_entries_bits + 1,
)

def elaborate(self, platform):
m = TModule()

m.submodules.enc_select = PriorityEncoder(width=self.rs_entries)
m.submodules += [self.perf_rs_wait_time, self.perf_num_full]

for i, record in enumerate(self.data):
m.d.comb += self.data_ready[i].eq(
Expand Down Expand Up @@ -71,6 +91,7 @@ def _(rs_entry_id: Value, rs_data: Value) -> None:
m.d.sync += self.data[rs_entry_id].rs_data.eq(rs_data)
m.d.sync += self.data[rs_entry_id].rec_full.eq(1)
m.d.sync += self.data[rs_entry_id].rec_reserved.eq(1)
self.perf_rs_wait_time.start(m, slot=rs_entry_id)

@def_method(m, self.update)
def _(reg_id: Value, reg_val: Value) -> None:
Expand All @@ -89,6 +110,7 @@ def _(rs_entry_id: Value) -> RecordDict:
record = self.data[rs_entry_id]
m.d.sync += record.rec_reserved.eq(0)
m.d.sync += record.rec_full.eq(0)
self.perf_rs_wait_time.stop(m, slot=rs_entry_id)
return {
"s1_val": record.rs_data.s1_val,
"s2_val": record.rs_data.s2_val,
Expand All @@ -105,4 +127,10 @@ def _(rs_entry_id: Value) -> RecordDict:
def _() -> RecordDict:
return {"ready_list": ready_list}

if self.perf_num_full.metrics_enabled():
num_full = Signal(self.rs_entries_bits + 1)
m.d.comb += num_full.eq(popcount(Cat(self.data[entry_id].rec_full for entry_id in range(self.rs_entries))))
with Transaction(name="perf").body(m):
self.perf_num_full.add(m, num_full)

return m
13 changes: 11 additions & 2 deletions coreblocks/func_blocks/fu/common/rs_func_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class RSFuncBlock(FuncBlock, Elaboratable):
layout described by `FuncUnitLayouts`.
"""

def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int):
def __init__(
self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int, rs_number: int
):
"""
Parameters
----------
Expand All @@ -41,10 +43,13 @@ def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, s
Functional units to be used by this module.
rs_entries: int
Number of entries in RS.
rs_number: int
The number of this RS block. Used for debugging.
"""
self.gen_params = gen_params
self.rs_entries = rs_entries
self.rs_entries_bits = (rs_entries - 1).bit_length()
self.rs_number = rs_number
self.rs_layouts = gen_params.get(RSLayouts, rs_entries_bits=self.rs_entries_bits)
self.fu_layouts = gen_params.get(FuncUnitLayouts)
self.func_units = list(func_units)
Expand All @@ -60,6 +65,7 @@ def elaborate(self, platform):
m.submodules.rs = self.rs = RS(
gen_params=self.gen_params,
rs_entries=self.rs_entries,
rs_number=self.rs_number,
ready_for=(optypes for _, optypes in self.func_units),
)

Expand Down Expand Up @@ -87,10 +93,13 @@ def elaborate(self, platform):
class RSBlockComponent(BlockComponentParams):
func_units: Collection[FunctionalComponentParams]
rs_entries: int
rs_number: int = -1 # overwritten by CoreConfiguration

def get_module(self, gen_params: GenParams) -> FuncBlock:
modules = list((u.get_module(gen_params), u.get_optypes()) for u in self.func_units)
rs_unit = RSFuncBlock(gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries)
rs_unit = RSFuncBlock(
gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries, rs_number=self.rs_number
)
return rs_unit

def get_optypes(self) -> set[OpType]:
Expand Down
2 changes: 2 additions & 0 deletions coreblocks/func_blocks/lsu/dummyLsu.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dataclasses import dataclass
from amaranth import *
from amaranth.lib.data import View

Expand Down Expand Up @@ -320,6 +321,7 @@ def _(rob_id: Value, side_fx: Value):
return m


@dataclass(frozen=True)
class LSUBlockComponent(BlockComponentParams):
def get_module(self, gen_params: GenParams) -> FuncBlock:
connections = gen_params.get(DependencyManager)
Expand Down
6 changes: 6 additions & 0 deletions coreblocks/params/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ class CoreConfiguration:
Definitions of PMAs per contiguous segments of memory.
"""

def __post_init__(self):
self.func_units_config = [
dataclasses.replace(conf, rs_number=k) if hasattr(conf, "rs_number") else conf
for k, conf in enumerate(self.func_units_config)
]

xlen: int = 32
func_units_config: Collection[BlockComponentParams] = basic_configuration

Expand Down
2 changes: 2 additions & 0 deletions coreblocks/params/fu_params.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import abstractmethod, ABC
from dataclasses import dataclass
from collections.abc import Collection, Iterable

from coreblocks.func_blocks.interface.func_protocols import FuncBlock, FuncUnit
Expand All @@ -20,6 +21,7 @@
]


@dataclass(frozen=True)
class BlockComponentParams(ABC):
@abstractmethod
def get_module(self, gen_params: "GenParams") -> FuncBlock:
Expand Down
2 changes: 1 addition & 1 deletion test/regression/cocotb/benchmark.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SIM_BUILD = build/benchmark

# Yosys/Amaranth borkedness workaround
ifeq ($(SIM),verilator)
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED
BUILD_ARGS += -j`nproc`
endif

Expand Down
2 changes: 1 addition & 1 deletion test/regression/cocotb/signature.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SIM_BUILD = build/signature

# Yosys/Amaranth borkedness workaround
ifeq ($(SIM),verilator)
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED
BUILD_ARGS += -j`nproc`
endif

Expand Down
2 changes: 1 addition & 1 deletion test/regression/cocotb/test.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SIM_BUILD = build/test

# Yosys/Amaranth borkedness workaround
ifeq ($(SIM),verilator)
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED
BUILD_ARGS += -j`nproc`
endif

Expand Down
2 changes: 1 addition & 1 deletion test/scheduler/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def setUp(self):
self.rs_count = len(self.optype_sets)
self.gen_params = GenParams(
test_core_config.replace(
func_units_config=tuple(RSBlockComponent([], rs_entries=4) for _ in range(self.rs_count))
func_units_config=tuple(RSBlockComponent([], rs_entries=4, rs_number=k) for k in range(self.rs_count))
)
)
self.expected_rename_queue = deque()
Expand Down
4 changes: 3 additions & 1 deletion test/scheduler/test_wakeup_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def elaborate(self, platform):
class TestWakeupSelect(TestCaseWithSimulator):
def setUp(self):
self.gen_params = GenParams(
test_core_config.replace(func_units_config=tuple(RSBlockComponent([], rs_entries=16) for _ in range(2)))
test_core_config.replace(
func_units_config=tuple(RSBlockComponent([], rs_entries=16, rs_number=k) for k in range(2))
)
)
self.m = WakeupTestCircuit(self.gen_params)
self.cycles = 50
Expand Down
12 changes: 6 additions & 6 deletions test/structs_common/test_rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TestRSMethodInsert(TestCaseWithSimulator):
def test_insert(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -69,7 +69,7 @@ class TestRSMethodSelect(TestCaseWithSimulator):
def test_select(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -132,7 +132,7 @@ class TestRSMethodUpdate(TestCaseWithSimulator):
def test_update(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -223,7 +223,7 @@ class TestRSMethodTake(TestCaseWithSimulator):
def test_take(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -322,7 +322,7 @@ class TestRSMethodGetReadyList(TestCaseWithSimulator):
def test_get_ready_list(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_two_get_ready_lists(self):
self.rs_entries = self.gen_params.max_rs_entries
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(
RS(self.gen_params, 2**self.rs_entries_bits, [[OpType(1), OpType(2)], [OpType(3), OpType(4)]])
RS(self.gen_params, 2**self.rs_entries_bits, 0, [[OpType(1), OpType(2)], [OpType(3), OpType(4)]])
)
self.insert_list = [
{
Expand Down
Loading

0 comments on commit f8add3c

Please sign in to comment.