Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More metrics for RF, RS and ROB #632

Merged
merged 15 commits into from
Apr 1, 2024
2 changes: 1 addition & 1 deletion coreblocks/cache/icache.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: C
self.perf_misses = HwCounter("frontend.icache.misses")
self.perf_errors = HwCounter("frontend.icache.fetch_errors")
self.perf_flushes = HwCounter("frontend.icache.flushes")
self.req_latency = LatencyMeasurer(
self.req_latency = FIFOLatencyMeasurer(
"frontend.icache.req_latency", "Latencies of cache requests", slots_number=2, max_latency=500
)

Expand Down
29 changes: 28 additions & 1 deletion coreblocks/core_structs/rf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from amaranth import *
from transactron import Method, def_method, TModule
from transactron import Method, Transaction, def_method, TModule
from coreblocks.interface.layouts import RFLayouts
from coreblocks.params import GenParams
from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer
from transactron.utils.amaranth_ext.functions import popcount
from transactron.utils.transactron_helpers import make_layout

__all__ = ["RegisterFile"]
Expand All @@ -20,9 +22,24 @@ def __init__(self, *, gen_params: GenParams):
self.write = Method(i=layouts.rf_write)
self.free = Method(i=layouts.rf_free)

self.perf_rf_valid_time = IndexedLatencyMeasurer(
"struct.rf.valid_time",
description="Distribution of time registers are valid in RF",
slots_number=2**gen_params.phys_regs_bits,
max_latency=1000,
)
self.perf_num_valid = HwExpHistogram(
"struct.rf.num_valid",
description="Number of valid registers in RF",
bucket_count=gen_params.phys_regs_bits + 1,
sample_width=gen_params.phys_regs_bits + 1,
)

def elaborate(self, platform):
m = TModule()

m.submodules += [self.perf_rf_valid_time, self.perf_num_valid]

being_written = Signal(self.gen_params.phys_regs_bits)
written_value = Signal(self.gen_params.isa.xlen)

Expand Down Expand Up @@ -56,10 +73,20 @@ def _(reg_id: Value, reg_val: Value):
with m.If(~(zero_reg)):
m.d.sync += self.entries[reg_id].reg_val.eq(reg_val)
m.d.sync += self.entries[reg_id].valid.eq(1)
self.perf_rf_valid_time.start(m, slot=reg_id)

@def_method(m, self.free)
def _(reg_id: Value):
with m.If(reg_id != 0):
m.d.sync += self.entries[reg_id].valid.eq(0)
self.perf_rf_valid_time.stop(m, slot=reg_id)

if self.perf_num_valid.metrics_enabled():
num_valid = Signal(self.gen_params.phys_regs_bits + 1)
m.d.comb += num_valid.eq(
popcount(Cat(self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits)))
)
with Transaction(name="perf").body(m):
self.perf_num_valid.add(m, num_valid)

return m
18 changes: 15 additions & 3 deletions coreblocks/core_structs/rob.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from amaranth import *
from transactron import Method, def_method, TModule
from transactron import Method, Transaction, def_method, TModule
from transactron.lib.metrics import *
from coreblocks.interface.layouts import ROBLayouts
from coreblocks.params import GenParams
Expand All @@ -18,17 +18,23 @@ def __init__(self, gen_params: GenParams) -> None:
self.data = Array(Signal(layouts.internal_layout) for _ in range(2**gen_params.rob_entries_bits))
self.get_indices = Method(o=layouts.get_indices, nonexclusive=True)

self.perf_rob_wait_time = LatencyMeasurer(
self.perf_rob_wait_time = FIFOLatencyMeasurer(
"backend.rob.wait_time",
description="Distribution of time instructions spend in ROB",
slots_number=(2**gen_params.rob_entries_bits + 1),
max_latency=1000,
)
self.perf_rob_size = HwExpHistogram(
"backend.rob.size",
description="Number of instructions in ROB",
bucket_count=gen_params.rob_entries_bits + 1,
sample_width=gen_params.rob_entries_bits,
)

def elaborate(self, platform):
m = TModule()

m.submodules += [self.perf_rob_wait_time]
m.submodules += [self.perf_rob_wait_time, self.perf_rob_size]

start_idx = Signal(self.params.rob_entries_bits)
end_idx = Signal(self.params.rob_entries_bits)
Expand Down Expand Up @@ -70,4 +76,10 @@ def _(rob_id: Value, exception):
def _():
return {"start": start_idx, "end": end_idx}

if self.perf_rob_size.metrics_enabled():
rob_size = Signal(self.params.rob_entries_bits)
m.d.comb += rob_size.eq((end_idx - start_idx)[0 : self.params.rob_entries_bits])
with Transaction(name="perf").body(m):
self.perf_rob_size.add(m, rob_size)

return m
1 change: 1 addition & 0 deletions coreblocks/func_blocks/csr/csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def _(rob_id: Value, side_fx: Value):
return m


@dataclass(frozen=True)
class CSRBlockComponent(BlockComponentParams):
def get_module(self, gen_params: GenParams) -> FuncBlock:
connections = gen_params.get(DependencyManager)
Expand Down
32 changes: 30 additions & 2 deletions coreblocks/func_blocks/fu/common/rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,25 @@
from typing import Optional
from amaranth import *
from amaranth.lib.coding import PriorityEncoder
from transactron import Method, def_method, TModule
from transactron import Method, Transaction, def_method, TModule
from coreblocks.params import GenParams
from coreblocks.frontend.decoder import OpType
from coreblocks.interface.layouts import RSLayouts
from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer
from transactron.utils import RecordDict
from transactron.utils.amaranth_ext.functions import popcount
from transactron.utils.transactron_helpers import make_layout

__all__ = ["RS"]


class RS(Elaboratable):
def __init__(
self, gen_params: GenParams, rs_entries: int, ready_for: Optional[Iterable[Iterable[OpType]]] = None
self,
gen_params: GenParams,
rs_entries: int,
rs_number: int,
ready_for: Optional[Iterable[Iterable[OpType]]] = None,
) -> None:
ready_for = ready_for or ((op for op in OpType),)
self.gen_params = gen_params
Expand All @@ -38,10 +44,24 @@ def __init__(
self.data = Array(Signal(self.internal_layout) for _ in range(self.rs_entries))
self.data_ready = Signal(self.rs_entries)

self.perf_rs_wait_time = IndexedLatencyMeasurer(
f"fu.block_{rs_number}.rs.valid_time",
description=f"Distribution of time instructions wait in RS {rs_number}",
slots_number=2**self.rs_entries_bits,
max_latency=1000,
)
self.perf_num_full = HwExpHistogram(
f"fu.block_{rs_number}.rs.num_full",
description=f"Number of full entries in RS {rs_number}",
bucket_count=self.rs_entries_bits + 1,
sample_width=self.rs_entries_bits + 1,
)

def elaborate(self, platform):
m = TModule()

m.submodules.enc_select = PriorityEncoder(width=self.rs_entries)
m.submodules += [self.perf_rs_wait_time, self.perf_num_full]

for i, record in enumerate(self.data):
m.d.comb += self.data_ready[i].eq(
Expand Down Expand Up @@ -71,6 +91,7 @@ def _(rs_entry_id: Value, rs_data: Value) -> None:
m.d.sync += self.data[rs_entry_id].rs_data.eq(rs_data)
m.d.sync += self.data[rs_entry_id].rec_full.eq(1)
m.d.sync += self.data[rs_entry_id].rec_reserved.eq(1)
self.perf_rs_wait_time.start(m, slot=rs_entry_id)

@def_method(m, self.update)
def _(reg_id: Value, reg_val: Value) -> None:
Expand All @@ -89,6 +110,7 @@ def _(rs_entry_id: Value) -> RecordDict:
record = self.data[rs_entry_id]
m.d.sync += record.rec_reserved.eq(0)
m.d.sync += record.rec_full.eq(0)
self.perf_rs_wait_time.stop(m, slot=rs_entry_id)
return {
"s1_val": record.rs_data.s1_val,
"s2_val": record.rs_data.s2_val,
Expand All @@ -105,4 +127,10 @@ def _(rs_entry_id: Value) -> RecordDict:
def _() -> RecordDict:
return {"ready_list": ready_list}

if self.perf_num_full.metrics_enabled():
num_full = Signal(self.rs_entries_bits + 1)
m.d.comb += num_full.eq(popcount(Cat(self.data[entry_id].rec_full for entry_id in range(self.rs_entries))))
with Transaction(name="perf").body(m):
self.perf_num_full.add(m, num_full)

return m
13 changes: 11 additions & 2 deletions coreblocks/func_blocks/fu/common/rs_func_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class RSFuncBlock(FuncBlock, Elaboratable):
layout described by `FuncUnitLayouts`.
"""

def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int):
def __init__(
self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int, rs_number: int
):
"""
Parameters
----------
Expand All @@ -41,10 +43,13 @@ def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, s
Functional units to be used by this module.
rs_entries: int
Number of entries in RS.
rs_number: int
The number of this RS block. Used for debugging.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If used for debugging, maybe there should be a default value? So that if anyone doesn't care about debug feature, then it doesn't have to pass that value?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about it, and I don't think this is a good thing. If rs_number is not set in a given CoreConfiguration then the metric will be hard to read, so the person doing the metrics will need to change the configuration.

Probably it would be better to auto-generate these numbers.

"""
self.gen_params = gen_params
self.rs_entries = rs_entries
self.rs_entries_bits = (rs_entries - 1).bit_length()
self.rs_number = rs_number
self.rs_layouts = gen_params.get(RSLayouts, rs_entries_bits=self.rs_entries_bits)
self.fu_layouts = gen_params.get(FuncUnitLayouts)
self.func_units = list(func_units)
Expand All @@ -60,6 +65,7 @@ def elaborate(self, platform):
m.submodules.rs = self.rs = RS(
gen_params=self.gen_params,
rs_entries=self.rs_entries,
rs_number=self.rs_number,
ready_for=(optypes for _, optypes in self.func_units),
)

Expand Down Expand Up @@ -87,10 +93,13 @@ def elaborate(self, platform):
class RSBlockComponent(BlockComponentParams):
func_units: Collection[FunctionalComponentParams]
rs_entries: int
rs_number: int = -1 # overwritten by CoreConfiguration

def get_module(self, gen_params: GenParams) -> FuncBlock:
modules = list((u.get_module(gen_params), u.get_optypes()) for u in self.func_units)
rs_unit = RSFuncBlock(gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries)
rs_unit = RSFuncBlock(
gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries, rs_number=self.rs_number
)
return rs_unit

def get_optypes(self) -> set[OpType]:
Expand Down
2 changes: 2 additions & 0 deletions coreblocks/func_blocks/lsu/dummyLsu.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dataclasses import dataclass
from amaranth import *
from amaranth.lib.data import View

Expand Down Expand Up @@ -320,6 +321,7 @@ def _(rob_id: Value, side_fx: Value):
return m


@dataclass(frozen=True)
class LSUBlockComponent(BlockComponentParams):
def get_module(self, gen_params: GenParams) -> FuncBlock:
connections = gen_params.get(DependencyManager)
Expand Down
6 changes: 6 additions & 0 deletions coreblocks/params/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ class CoreConfiguration:
Definitions of PMAs per contiguous segments of memory.
"""

def __post_init__(self):
self.func_units_config = [
dataclasses.replace(conf, rs_number=k) if hasattr(conf, "rs_number") else conf
for k, conf in enumerate(self.func_units_config)
]

xlen: int = 32
func_units_config: Collection[BlockComponentParams] = basic_configuration

Expand Down
2 changes: 2 additions & 0 deletions coreblocks/params/fu_params.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import abstractmethod, ABC
from dataclasses import dataclass
from collections.abc import Collection, Iterable

from coreblocks.func_blocks.interface.func_protocols import FuncBlock, FuncUnit
Expand All @@ -20,6 +21,7 @@
]


@dataclass(frozen=True)
class BlockComponentParams(ABC):
@abstractmethod
def get_module(self, gen_params: "GenParams") -> FuncBlock:
Expand Down
2 changes: 1 addition & 1 deletion test/regression/cocotb/benchmark.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SIM_BUILD = build/benchmark

# Yosys/Amaranth borkedness workaround
ifeq ($(SIM),verilator)
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED
BUILD_ARGS += -j`nproc`
endif

Expand Down
2 changes: 1 addition & 1 deletion test/regression/cocotb/signature.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SIM_BUILD = build/signature

# Yosys/Amaranth borkedness workaround
ifeq ($(SIM),verilator)
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED
BUILD_ARGS += -j`nproc`
endif

Expand Down
2 changes: 1 addition & 1 deletion test/regression/cocotb/test.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SIM_BUILD = build/test

# Yosys/Amaranth borkedness workaround
ifeq ($(SIM),verilator)
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC
EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED
BUILD_ARGS += -j`nproc`
endif

Expand Down
2 changes: 1 addition & 1 deletion test/scheduler/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def setUp(self):
self.rs_count = len(self.optype_sets)
self.gen_params = GenParams(
test_core_config.replace(
func_units_config=tuple(RSBlockComponent([], rs_entries=4) for _ in range(self.rs_count))
func_units_config=tuple(RSBlockComponent([], rs_entries=4, rs_number=k) for k in range(self.rs_count))
)
)
self.expected_rename_queue = deque()
Expand Down
4 changes: 3 additions & 1 deletion test/scheduler/test_wakeup_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def elaborate(self, platform):
class TestWakeupSelect(TestCaseWithSimulator):
def setUp(self):
self.gen_params = GenParams(
test_core_config.replace(func_units_config=tuple(RSBlockComponent([], rs_entries=16) for _ in range(2)))
test_core_config.replace(
func_units_config=tuple(RSBlockComponent([], rs_entries=16, rs_number=k) for k in range(2))
)
)
self.m = WakeupTestCircuit(self.gen_params)
self.cycles = 50
Expand Down
12 changes: 6 additions & 6 deletions test/structs_common/test_rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TestRSMethodInsert(TestCaseWithSimulator):
def test_insert(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -69,7 +69,7 @@ class TestRSMethodSelect(TestCaseWithSimulator):
def test_select(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -132,7 +132,7 @@ class TestRSMethodUpdate(TestCaseWithSimulator):
def test_update(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -223,7 +223,7 @@ class TestRSMethodTake(TestCaseWithSimulator):
def test_take(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -322,7 +322,7 @@ class TestRSMethodGetReadyList(TestCaseWithSimulator):
def test_get_ready_list(self):
self.gen_params = GenParams(test_core_config)
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None))
self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None))
self.insert_list = [
{
"rs_entry_id": id,
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_two_get_ready_lists(self):
self.rs_entries = self.gen_params.max_rs_entries
self.rs_entries_bits = self.gen_params.max_rs_entries_bits
self.m = SimpleTestCircuit(
RS(self.gen_params, 2**self.rs_entries_bits, [[OpType(1), OpType(2)], [OpType(3), OpType(4)]])
RS(self.gen_params, 2**self.rs_entries_bits, 0, [[OpType(1), OpType(2)], [OpType(3), OpType(4)]])
)
self.insert_list = [
{
Expand Down
Loading