From 2084d88d7bbf3bae3d2872bc5742ab7243ec7087 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Wed, 27 Mar 2024 12:05:37 +0100 Subject: [PATCH 01/14] Add metrics for structs --- coreblocks/core_structs/rf.py | 20 +++++++++++++++++- coreblocks/core_structs/rob.py | 16 ++++++++++++-- coreblocks/func_blocks/fu/common/rs.py | 21 +++++++++++++++++-- .../func_blocks/fu/common/rs_func_block.py | 9 ++++++-- coreblocks/params/configurations.py | 7 +++++-- 5 files changed, 64 insertions(+), 9 deletions(-) diff --git a/coreblocks/core_structs/rf.py b/coreblocks/core_structs/rf.py index f7a9b8a7f..b255d71a6 100644 --- a/coreblocks/core_structs/rf.py +++ b/coreblocks/core_structs/rf.py @@ -1,7 +1,10 @@ +import operator from amaranth import * -from transactron import Method, def_method, TModule +from functools import reduce +from transactron import Method, Transaction, def_method, TModule from coreblocks.interface.layouts import RFLayouts from coreblocks.params import GenParams +from transactron.lib.metrics import HwExpHistogram from transactron.utils.transactron_helpers import make_layout __all__ = ["RegisterFile"] @@ -20,8 +23,15 @@ def __init__(self, *, gen_params: GenParams): self.write = Method(i=layouts.rf_write) self.free = Method(i=layouts.rf_free) + self.perf_num_valid = HwExpHistogram( + "struct.rf.num_valid", description="Number of valid registers in RF", bucket_count=gen_params.phys_regs_bits, + sample_width=gen_params.phys_regs_bits + 1 + ) + def elaborate(self, platform): m = TModule() + + m.submodules += [self.perf_num_valid] being_written = Signal(self.gen_params.phys_regs_bits) written_value = Signal(self.gen_params.isa.xlen) @@ -62,4 +72,12 @@ def _(reg_id: Value): with m.If(reg_id != 0): m.d.sync += self.entries[reg_id].valid.eq(0) + if self.perf_num_valid.metrics_enabled(): + num_valid = Signal(self.gen_params.phys_regs_bits + 1) + m.d.comb += num_valid.eq(reduce( + operator.add, (self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits)) + )) + with Transaction(name="perf").body(m): + self.perf_num_valid.add(m, num_valid) + return m diff --git a/coreblocks/core_structs/rob.py b/coreblocks/core_structs/rob.py index 1f3806d46..20b8eaff7 100644 --- a/coreblocks/core_structs/rob.py +++ b/coreblocks/core_structs/rob.py @@ -1,5 +1,5 @@ from amaranth import * -from transactron import Method, def_method, TModule +from transactron import Method, Transaction, def_method, TModule from transactron.lib.metrics import * from coreblocks.interface.layouts import ROBLayouts from coreblocks.params import GenParams @@ -24,11 +24,17 @@ def __init__(self, gen_params: GenParams) -> None: slots_number=(2**gen_params.rob_entries_bits + 1), max_latency=1000, ) + self.perf_rob_size = HwExpHistogram( + "backend.rob.size", + description="Number of instructions in ROB", + bucket_count=gen_params.rob_entries_bits, + sample_width=gen_params.rob_entries_bits + 1 + ) def elaborate(self, platform): m = TModule() - m.submodules += [self.perf_rob_wait_time] + m.submodules += [self.perf_rob_wait_time, self.perf_rob_size] start_idx = Signal(self.params.rob_entries_bits) end_idx = Signal(self.params.rob_entries_bits) @@ -70,4 +76,10 @@ def _(rob_id: Value, exception): def _(): return {"start": start_idx, "end": end_idx} + if self.perf_rob_size.metrics_enabled(): + rob_size = Signal(self.params.rob_entries_bits + 1) + m.d.comb += rob_size.eq(end_idx - start_idx) + with Transaction(name="perf").body(m): + self.perf_rob_size.add(m, rob_size) + return m diff --git a/coreblocks/func_blocks/fu/common/rs.py b/coreblocks/func_blocks/fu/common/rs.py index 56287df27..fcef4210b 100644 --- a/coreblocks/func_blocks/fu/common/rs.py +++ b/coreblocks/func_blocks/fu/common/rs.py @@ -1,11 +1,14 @@ +import operator +from functools import reduce from collections.abc import Iterable from typing import Optional from amaranth import * from amaranth.lib.coding import PriorityEncoder -from transactron import Method, def_method, TModule +from transactron import Method, Transaction, def_method, TModule from coreblocks.params import GenParams from coreblocks.frontend.decoder import OpType from coreblocks.interface.layouts import RSLayouts +from transactron.lib.metrics import HwExpHistogram from transactron.utils import RecordDict from transactron.utils.transactron_helpers import make_layout @@ -14,7 +17,7 @@ class RS(Elaboratable): def __init__( - self, gen_params: GenParams, rs_entries: int, ready_for: Optional[Iterable[Iterable[OpType]]] = None + self, gen_params: GenParams, rs_entries: int, rs_number: int, ready_for: Optional[Iterable[Iterable[OpType]]] = None ) -> None: ready_for = ready_for or ((op for op in OpType),) self.gen_params = gen_params @@ -38,10 +41,18 @@ def __init__( self.data = Array(Signal(self.internal_layout) for _ in range(self.rs_entries)) self.data_ready = Signal(self.rs_entries) + self.perf_num_full = HwExpHistogram( + f"fu.block_{rs_number}.rs.num_full", + description=f"Number of full entries in RS {rs_number}", + bucket_count=self.rs_entries_bits, + sample_width=self.rs_entries_bits + 1 + ) + def elaborate(self, platform): m = TModule() m.submodules.enc_select = PriorityEncoder(width=self.rs_entries) + m.submodules += [self.perf_num_full] for i, record in enumerate(self.data): m.d.comb += self.data_ready[i].eq( @@ -105,4 +116,10 @@ def _(rs_entry_id: Value) -> RecordDict: def _() -> RecordDict: return {"ready_list": ready_list} + if self.perf_num_full.metrics_enabled(): + num_full = Signal(self.rs_entries_bits + 1) + m.d.comb += num_full.eq(reduce(operator.add, (self.data[entry_id].rec_full for entry_id in range(self.rs_entries)))) + with Transaction(name="perf").body(m): + self.perf_num_full.add(m, num_full) + return m diff --git a/coreblocks/func_blocks/fu/common/rs_func_block.py b/coreblocks/func_blocks/fu/common/rs_func_block.py index 66fed3d0e..058995aea 100644 --- a/coreblocks/func_blocks/fu/common/rs_func_block.py +++ b/coreblocks/func_blocks/fu/common/rs_func_block.py @@ -31,7 +31,7 @@ class RSFuncBlock(FuncBlock, Elaboratable): layout described by `FuncUnitLayouts`. """ - def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int): + def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int, rs_number: int): """ Parameters ---------- @@ -41,10 +41,13 @@ def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, s Functional units to be used by this module. rs_entries: int Number of entries in RS. + rs_number: int + The number of this RS block. Used for debugging. """ self.gen_params = gen_params self.rs_entries = rs_entries self.rs_entries_bits = (rs_entries - 1).bit_length() + self.rs_number = rs_number self.rs_layouts = gen_params.get(RSLayouts, rs_entries_bits=self.rs_entries_bits) self.fu_layouts = gen_params.get(FuncUnitLayouts) self.func_units = list(func_units) @@ -60,6 +63,7 @@ def elaborate(self, platform): m.submodules.rs = self.rs = RS( gen_params=self.gen_params, rs_entries=self.rs_entries, + rs_number=self.rs_number, ready_for=(optypes for _, optypes in self.func_units), ) @@ -87,10 +91,11 @@ def elaborate(self, platform): class RSBlockComponent(BlockComponentParams): func_units: Collection[FunctionalComponentParams] rs_entries: int + rs_number: int def get_module(self, gen_params: GenParams) -> FuncBlock: modules = list((u.get_module(gen_params), u.get_optypes()) for u in self.func_units) - rs_unit = RSFuncBlock(gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries) + rs_unit = RSFuncBlock(gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries, rs_number=self.rs_number) return rs_unit def get_optypes(self) -> set[OpType]: diff --git a/coreblocks/params/configurations.py b/coreblocks/params/configurations.py index a9dee4931..8b9257a75 100644 --- a/coreblocks/params/configurations.py +++ b/coreblocks/params/configurations.py @@ -26,6 +26,7 @@ RSBlockComponent( [ALUComponent(), ShiftUnitComponent(), JumpComponent(), ExceptionUnitComponent(), PrivilegedUnitComponent()], rs_entries=4, + rs_number=0 ), LSUBlockComponent(), CSRBlockComponent(), @@ -106,7 +107,7 @@ def replace(self, **kwargs): tiny_core_config = CoreConfiguration( embedded=True, func_units_config=( - RSBlockComponent([ALUComponent(), ShiftUnitComponent(), JumpComponent()], rs_entries=2), + RSBlockComponent([ALUComponent(), ShiftUnitComponent(), JumpComponent()], rs_entries=2, rs_number=0), LSUBlockComponent(), ), phys_regs_bits=basic_core_config.phys_regs_bits - 1, @@ -128,6 +129,7 @@ def replace(self, **kwargs): PrivilegedUnitComponent(), ], rs_entries=4, + rs_number=0 ), RSBlockComponent( [ @@ -135,6 +137,7 @@ def replace(self, **kwargs): DivComponent(), ], rs_entries=2, + rs_number=1 ), LSUBlockComponent(), CSRBlockComponent(), @@ -144,7 +147,7 @@ def replace(self, **kwargs): # Core configuration used in internal testbenches test_core_config = CoreConfiguration( - func_units_config=tuple(RSBlockComponent([], rs_entries=4) for _ in range(2)), + func_units_config=tuple(RSBlockComponent([], rs_entries=4, rs_number=k) for k in range(2)), rob_entries_bits=7, phys_regs_bits=7, _implied_extensions=Extension.I, From 0afb16ded385f7c839ad533333346744a7ab765b Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Wed, 27 Mar 2024 12:43:40 +0100 Subject: [PATCH 02/14] Various fixes --- coreblocks/core_structs/rf.py | 16 ++++++++++------ coreblocks/core_structs/rob.py | 8 ++++---- coreblocks/func_blocks/fu/common/rs.py | 14 ++++++++++---- .../func_blocks/fu/common/rs_func_block.py | 8 ++++++-- coreblocks/params/configurations.py | 6 +++--- test/regression/cocotb/benchmark.Makefile | 2 +- test/regression/cocotb/signature.Makefile | 2 +- test/regression/cocotb/test.Makefile | 2 +- 8 files changed, 36 insertions(+), 22 deletions(-) diff --git a/coreblocks/core_structs/rf.py b/coreblocks/core_structs/rf.py index b255d71a6..c6599f043 100644 --- a/coreblocks/core_structs/rf.py +++ b/coreblocks/core_structs/rf.py @@ -24,13 +24,15 @@ def __init__(self, *, gen_params: GenParams): self.free = Method(i=layouts.rf_free) self.perf_num_valid = HwExpHistogram( - "struct.rf.num_valid", description="Number of valid registers in RF", bucket_count=gen_params.phys_regs_bits, - sample_width=gen_params.phys_regs_bits + 1 + "struct.rf.num_valid", + description="Number of valid registers in RF", + bucket_count=gen_params.phys_regs_bits + 1, + sample_width=gen_params.phys_regs_bits + 1, ) def elaborate(self, platform): m = TModule() - + m.submodules += [self.perf_num_valid] being_written = Signal(self.gen_params.phys_regs_bits) @@ -74,9 +76,11 @@ def _(reg_id: Value): if self.perf_num_valid.metrics_enabled(): num_valid = Signal(self.gen_params.phys_regs_bits + 1) - m.d.comb += num_valid.eq(reduce( - operator.add, (self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits)) - )) + m.d.comb += num_valid.eq( + reduce( + operator.add, (self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits)) + ) + ) with Transaction(name="perf").body(m): self.perf_num_valid.add(m, num_valid) diff --git a/coreblocks/core_structs/rob.py b/coreblocks/core_structs/rob.py index 20b8eaff7..be15cdb49 100644 --- a/coreblocks/core_structs/rob.py +++ b/coreblocks/core_structs/rob.py @@ -27,8 +27,8 @@ def __init__(self, gen_params: GenParams) -> None: self.perf_rob_size = HwExpHistogram( "backend.rob.size", description="Number of instructions in ROB", - bucket_count=gen_params.rob_entries_bits, - sample_width=gen_params.rob_entries_bits + 1 + bucket_count=gen_params.rob_entries_bits + 1, + sample_width=gen_params.rob_entries_bits, ) def elaborate(self, platform): @@ -77,8 +77,8 @@ def _(): return {"start": start_idx, "end": end_idx} if self.perf_rob_size.metrics_enabled(): - rob_size = Signal(self.params.rob_entries_bits + 1) - m.d.comb += rob_size.eq(end_idx - start_idx) + rob_size = Signal(self.params.rob_entries_bits) + m.d.comb += rob_size.eq((end_idx - start_idx)[0 : self.params.rob_entries_bits]) with Transaction(name="perf").body(m): self.perf_rob_size.add(m, rob_size) diff --git a/coreblocks/func_blocks/fu/common/rs.py b/coreblocks/func_blocks/fu/common/rs.py index fcef4210b..dfb9da167 100644 --- a/coreblocks/func_blocks/fu/common/rs.py +++ b/coreblocks/func_blocks/fu/common/rs.py @@ -17,7 +17,11 @@ class RS(Elaboratable): def __init__( - self, gen_params: GenParams, rs_entries: int, rs_number: int, ready_for: Optional[Iterable[Iterable[OpType]]] = None + self, + gen_params: GenParams, + rs_entries: int, + rs_number: int, + ready_for: Optional[Iterable[Iterable[OpType]]] = None, ) -> None: ready_for = ready_for or ((op for op in OpType),) self.gen_params = gen_params @@ -44,8 +48,8 @@ def __init__( self.perf_num_full = HwExpHistogram( f"fu.block_{rs_number}.rs.num_full", description=f"Number of full entries in RS {rs_number}", - bucket_count=self.rs_entries_bits, - sample_width=self.rs_entries_bits + 1 + bucket_count=self.rs_entries_bits + 1, + sample_width=self.rs_entries_bits + 1, ) def elaborate(self, platform): @@ -118,7 +122,9 @@ def _() -> RecordDict: if self.perf_num_full.metrics_enabled(): num_full = Signal(self.rs_entries_bits + 1) - m.d.comb += num_full.eq(reduce(operator.add, (self.data[entry_id].rec_full for entry_id in range(self.rs_entries)))) + m.d.comb += num_full.eq( + reduce(operator.add, (self.data[entry_id].rec_full for entry_id in range(self.rs_entries))) + ) with Transaction(name="perf").body(m): self.perf_num_full.add(m, num_full) diff --git a/coreblocks/func_blocks/fu/common/rs_func_block.py b/coreblocks/func_blocks/fu/common/rs_func_block.py index 058995aea..6345caf6b 100644 --- a/coreblocks/func_blocks/fu/common/rs_func_block.py +++ b/coreblocks/func_blocks/fu/common/rs_func_block.py @@ -31,7 +31,9 @@ class RSFuncBlock(FuncBlock, Elaboratable): layout described by `FuncUnitLayouts`. """ - def __init__(self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int, rs_number: int): + def __init__( + self, gen_params: GenParams, func_units: Iterable[tuple[FuncUnit, set[OpType]]], rs_entries: int, rs_number: int + ): """ Parameters ---------- @@ -95,7 +97,9 @@ class RSBlockComponent(BlockComponentParams): def get_module(self, gen_params: GenParams) -> FuncBlock: modules = list((u.get_module(gen_params), u.get_optypes()) for u in self.func_units) - rs_unit = RSFuncBlock(gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries, rs_number=self.rs_number) + rs_unit = RSFuncBlock( + gen_params=gen_params, func_units=modules, rs_entries=self.rs_entries, rs_number=self.rs_number + ) return rs_unit def get_optypes(self) -> set[OpType]: diff --git a/coreblocks/params/configurations.py b/coreblocks/params/configurations.py index 8b9257a75..b7fb53173 100644 --- a/coreblocks/params/configurations.py +++ b/coreblocks/params/configurations.py @@ -26,7 +26,7 @@ RSBlockComponent( [ALUComponent(), ShiftUnitComponent(), JumpComponent(), ExceptionUnitComponent(), PrivilegedUnitComponent()], rs_entries=4, - rs_number=0 + rs_number=0, ), LSUBlockComponent(), CSRBlockComponent(), @@ -129,7 +129,7 @@ def replace(self, **kwargs): PrivilegedUnitComponent(), ], rs_entries=4, - rs_number=0 + rs_number=0, ), RSBlockComponent( [ @@ -137,7 +137,7 @@ def replace(self, **kwargs): DivComponent(), ], rs_entries=2, - rs_number=1 + rs_number=1, ), LSUBlockComponent(), CSRBlockComponent(), diff --git a/test/regression/cocotb/benchmark.Makefile b/test/regression/cocotb/benchmark.Makefile index 9962315fb..e49b55b39 100644 --- a/test/regression/cocotb/benchmark.Makefile +++ b/test/regression/cocotb/benchmark.Makefile @@ -14,7 +14,7 @@ SIM_BUILD = build/benchmark # Yosys/Amaranth borkedness workaround ifeq ($(SIM),verilator) - EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC + EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED BUILD_ARGS += -j`nproc` endif diff --git a/test/regression/cocotb/signature.Makefile b/test/regression/cocotb/signature.Makefile index b4f690635..a03d0a5f8 100644 --- a/test/regression/cocotb/signature.Makefile +++ b/test/regression/cocotb/signature.Makefile @@ -14,7 +14,7 @@ SIM_BUILD = build/signature # Yosys/Amaranth borkedness workaround ifeq ($(SIM),verilator) - EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC + EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED BUILD_ARGS += -j`nproc` endif diff --git a/test/regression/cocotb/test.Makefile b/test/regression/cocotb/test.Makefile index 210618067..5b9f7aad9 100644 --- a/test/regression/cocotb/test.Makefile +++ b/test/regression/cocotb/test.Makefile @@ -14,7 +14,7 @@ SIM_BUILD = build/test # Yosys/Amaranth borkedness workaround ifeq ($(SIM),verilator) - EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC + EXTRA_ARGS += -Wno-CASEINCOMPLETE -Wno-CASEOVERLAP -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-UNSIGNED BUILD_ARGS += -j`nproc` endif From 9f5ecf8e8e605401e7eeef2f54e41d1800306bc2 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Wed, 27 Mar 2024 13:31:15 +0100 Subject: [PATCH 03/14] Towards indexed latency measurer --- coreblocks/cache/icache.py | 2 +- coreblocks/core_structs/rf.py | 12 ++- coreblocks/core_structs/rob.py | 2 +- coreblocks/func_blocks/fu/common/rs.py | 12 ++- test/scheduler/test_scheduler.py | 2 +- test/scheduler/test_wakeup_select.py | 4 +- test/structs_common/test_rs.py | 12 +-- test/transactron/test_metrics.py | 4 +- transactron/lib/metrics.py | 130 ++++++++++++++++++++++++- transactron/lib/storage.py | 77 ++++++++++++++- 10 files changed, 236 insertions(+), 21 deletions(-) diff --git a/coreblocks/cache/icache.py b/coreblocks/cache/icache.py index f94c6e07c..605e22e88 100644 --- a/coreblocks/cache/icache.py +++ b/coreblocks/cache/icache.py @@ -123,7 +123,7 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: C self.perf_misses = HwCounter("frontend.icache.misses") self.perf_errors = HwCounter("frontend.icache.fetch_errors") self.perf_flushes = HwCounter("frontend.icache.flushes") - self.req_latency = LatencyMeasurer( + self.req_latency = FIFOLatencyMeasurer( "frontend.icache.req_latency", "Latencies of cache requests", slots_number=2, max_latency=500 ) diff --git a/coreblocks/core_structs/rf.py b/coreblocks/core_structs/rf.py index c6599f043..d6bed0c9f 100644 --- a/coreblocks/core_structs/rf.py +++ b/coreblocks/core_structs/rf.py @@ -4,7 +4,7 @@ from transactron import Method, Transaction, def_method, TModule from coreblocks.interface.layouts import RFLayouts from coreblocks.params import GenParams -from transactron.lib.metrics import HwExpHistogram +from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer from transactron.utils.transactron_helpers import make_layout __all__ = ["RegisterFile"] @@ -23,6 +23,12 @@ def __init__(self, *, gen_params: GenParams): self.write = Method(i=layouts.rf_write) self.free = Method(i=layouts.rf_free) + self.perf_rf_valid_time = IndexedLatencyMeasurer( + "struct.rf.valid_time", + description="Distribution of time registers are valid in RF", + slots_number=2**gen_params.phys_regs_bits, + max_latency=1000, + ) self.perf_num_valid = HwExpHistogram( "struct.rf.num_valid", description="Number of valid registers in RF", @@ -33,7 +39,7 @@ def __init__(self, *, gen_params: GenParams): def elaborate(self, platform): m = TModule() - m.submodules += [self.perf_num_valid] + m.submodules += [self.perf_rf_valid_time, self.perf_num_valid] being_written = Signal(self.gen_params.phys_regs_bits) written_value = Signal(self.gen_params.isa.xlen) @@ -68,11 +74,13 @@ def _(reg_id: Value, reg_val: Value): with m.If(~(zero_reg)): m.d.sync += self.entries[reg_id].reg_val.eq(reg_val) m.d.sync += self.entries[reg_id].valid.eq(1) + self.perf_rf_valid_time.start(m, slot=reg_id) @def_method(m, self.free) def _(reg_id: Value): with m.If(reg_id != 0): m.d.sync += self.entries[reg_id].valid.eq(0) + self.perf_rf_valid_time.stop(m, slot=reg_id) if self.perf_num_valid.metrics_enabled(): num_valid = Signal(self.gen_params.phys_regs_bits + 1) diff --git a/coreblocks/core_structs/rob.py b/coreblocks/core_structs/rob.py index be15cdb49..25b14bab3 100644 --- a/coreblocks/core_structs/rob.py +++ b/coreblocks/core_structs/rob.py @@ -18,7 +18,7 @@ def __init__(self, gen_params: GenParams) -> None: self.data = Array(Signal(layouts.internal_layout) for _ in range(2**gen_params.rob_entries_bits)) self.get_indices = Method(o=layouts.get_indices, nonexclusive=True) - self.perf_rob_wait_time = LatencyMeasurer( + self.perf_rob_wait_time = FIFOLatencyMeasurer( "backend.rob.wait_time", description="Distribution of time instructions spend in ROB", slots_number=(2**gen_params.rob_entries_bits + 1), diff --git a/coreblocks/func_blocks/fu/common/rs.py b/coreblocks/func_blocks/fu/common/rs.py index dfb9da167..3c05c59f6 100644 --- a/coreblocks/func_blocks/fu/common/rs.py +++ b/coreblocks/func_blocks/fu/common/rs.py @@ -8,7 +8,7 @@ from coreblocks.params import GenParams from coreblocks.frontend.decoder import OpType from coreblocks.interface.layouts import RSLayouts -from transactron.lib.metrics import HwExpHistogram +from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer from transactron.utils import RecordDict from transactron.utils.transactron_helpers import make_layout @@ -45,6 +45,12 @@ def __init__( self.data = Array(Signal(self.internal_layout) for _ in range(self.rs_entries)) self.data_ready = Signal(self.rs_entries) + self.perf_rs_wait_time = IndexedLatencyMeasurer( + f"fu.block_{rs_number}.rs.valid_time", + description=f"Distribution of time instructions wait in RS {rs_number}", + slots_number=2**self.rs_entries_bits, + max_latency=1000, + ) self.perf_num_full = HwExpHistogram( f"fu.block_{rs_number}.rs.num_full", description=f"Number of full entries in RS {rs_number}", @@ -56,7 +62,7 @@ def elaborate(self, platform): m = TModule() m.submodules.enc_select = PriorityEncoder(width=self.rs_entries) - m.submodules += [self.perf_num_full] + m.submodules += [self.perf_rs_wait_time, self.perf_num_full] for i, record in enumerate(self.data): m.d.comb += self.data_ready[i].eq( @@ -86,6 +92,7 @@ def _(rs_entry_id: Value, rs_data: Value) -> None: m.d.sync += self.data[rs_entry_id].rs_data.eq(rs_data) m.d.sync += self.data[rs_entry_id].rec_full.eq(1) m.d.sync += self.data[rs_entry_id].rec_reserved.eq(1) + self.perf_rs_wait_time.start(m, slot=rs_entry_id) @def_method(m, self.update) def _(reg_id: Value, reg_val: Value) -> None: @@ -104,6 +111,7 @@ def _(rs_entry_id: Value) -> RecordDict: record = self.data[rs_entry_id] m.d.sync += record.rec_reserved.eq(0) m.d.sync += record.rec_full.eq(0) + self.perf_rs_wait_time.stop(m, slot=rs_entry_id) return { "s1_val": record.rs_data.s1_val, "s2_val": record.rs_data.s2_val, diff --git a/test/scheduler/test_scheduler.py b/test/scheduler/test_scheduler.py index 3c50efab6..2fcf54a50 100644 --- a/test/scheduler/test_scheduler.py +++ b/test/scheduler/test_scheduler.py @@ -127,7 +127,7 @@ def setUp(self): self.rs_count = len(self.optype_sets) self.gen_params = GenParams( test_core_config.replace( - func_units_config=tuple(RSBlockComponent([], rs_entries=4) for _ in range(self.rs_count)) + func_units_config=tuple(RSBlockComponent([], rs_entries=4, rs_number=k) for k in range(self.rs_count)) ) ) self.expected_rename_queue = deque() diff --git a/test/scheduler/test_wakeup_select.py b/test/scheduler/test_wakeup_select.py index 4ff298da9..3e406e1af 100644 --- a/test/scheduler/test_wakeup_select.py +++ b/test/scheduler/test_wakeup_select.py @@ -43,7 +43,9 @@ def elaborate(self, platform): class TestWakeupSelect(TestCaseWithSimulator): def setUp(self): self.gen_params = GenParams( - test_core_config.replace(func_units_config=tuple(RSBlockComponent([], rs_entries=16) for _ in range(2))) + test_core_config.replace( + func_units_config=tuple(RSBlockComponent([], rs_entries=16, rs_number=k) for k in range(2)) + ) ) self.m = WakeupTestCircuit(self.gen_params) self.cycles = 50 diff --git a/test/structs_common/test_rs.py b/test/structs_common/test_rs.py index 4e86a46de..c62852cb0 100644 --- a/test/structs_common/test_rs.py +++ b/test/structs_common/test_rs.py @@ -24,7 +24,7 @@ class TestRSMethodInsert(TestCaseWithSimulator): def test_insert(self): self.gen_params = GenParams(test_core_config) self.rs_entries_bits = self.gen_params.max_rs_entries_bits - self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None)) + self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None)) self.insert_list = [ { "rs_entry_id": id, @@ -69,7 +69,7 @@ class TestRSMethodSelect(TestCaseWithSimulator): def test_select(self): self.gen_params = GenParams(test_core_config) self.rs_entries_bits = self.gen_params.max_rs_entries_bits - self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None)) + self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None)) self.insert_list = [ { "rs_entry_id": id, @@ -132,7 +132,7 @@ class TestRSMethodUpdate(TestCaseWithSimulator): def test_update(self): self.gen_params = GenParams(test_core_config) self.rs_entries_bits = self.gen_params.max_rs_entries_bits - self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None)) + self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None)) self.insert_list = [ { "rs_entry_id": id, @@ -223,7 +223,7 @@ class TestRSMethodTake(TestCaseWithSimulator): def test_take(self): self.gen_params = GenParams(test_core_config) self.rs_entries_bits = self.gen_params.max_rs_entries_bits - self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None)) + self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None)) self.insert_list = [ { "rs_entry_id": id, @@ -322,7 +322,7 @@ class TestRSMethodGetReadyList(TestCaseWithSimulator): def test_get_ready_list(self): self.gen_params = GenParams(test_core_config) self.rs_entries_bits = self.gen_params.max_rs_entries_bits - self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, None)) + self.m = SimpleTestCircuit(RS(self.gen_params, 2**self.rs_entries_bits, 0, None)) self.insert_list = [ { "rs_entry_id": id, @@ -378,7 +378,7 @@ def test_two_get_ready_lists(self): self.rs_entries = self.gen_params.max_rs_entries self.rs_entries_bits = self.gen_params.max_rs_entries_bits self.m = SimpleTestCircuit( - RS(self.gen_params, 2**self.rs_entries_bits, [[OpType(1), OpType(2)], [OpType(3), OpType(4)]]) + RS(self.gen_params, 2**self.rs_entries_bits, 0, [[OpType(1), OpType(2)], [OpType(3), OpType(4)]]) ) self.insert_list = [ { diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py index 12acdfd27..c52e9aed4 100644 --- a/test/transactron/test_metrics.py +++ b/test/transactron/test_metrics.py @@ -237,14 +237,14 @@ def test_process(): (5, 5), ], ) -class TestLatencyMeasurer(TestCaseWithSimulator): +class TestFIFOLatencyMeasurer(TestCaseWithSimulator): slots_number: int expected_consumer_wait: float def test_latency_measurer(self): random.seed(42) - m = SimpleTestCircuit(LatencyMeasurer("latency", slots_number=self.slots_number, max_latency=300)) + m = SimpleTestCircuit(FIFOLatencyMeasurer("latency", slots_number=self.slots_number, max_latency=300)) DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True) latencies: list[int] = [] diff --git a/transactron/lib/metrics.py b/transactron/lib/metrics.py index 2e706e0a3..82f6f0bfa 100644 --- a/transactron/lib/metrics.py +++ b/transactron/lib/metrics.py @@ -9,7 +9,7 @@ from transactron.utils import ValueLike from transactron import Method, def_method, TModule from transactron.utils import SignalBundle -from transactron.lib import FIFO +from transactron.lib import FIFO, AsyncMemoryBank from transactron.utils.dependencies import ListKey, DependencyContext, SimpleKey __all__ = [ @@ -18,7 +18,8 @@ "HwMetric", "HwCounter", "HwExpHistogram", - "LatencyMeasurer", + "FIFOLatencyMeasurer", + "IndexedLatencyMeasurer", "HardwareMetricsManager", "HwMetricsEnabledKey", ] @@ -354,7 +355,7 @@ def add(self, m: TModule, sample: Value): self._add(m, sample) -class LatencyMeasurer(Elaboratable): +class FIFOLatencyMeasurer(Elaboratable): """ Measures duration between two events, e.g. request processing latency. It can track multiple events at the same time, i.e. the second event can @@ -379,7 +380,7 @@ def __init__( The fully qualified name of the metric. description: str A human-readable description of the metric's functionality. - slots_number: str + slots_number: int A number of events that the module can track simultaneously. max_latency: int The maximum latency of an event. Used to set signal widths and @@ -473,6 +474,127 @@ def metrics_enabled(self) -> bool: return DependencyContext.get().get_dependency(HwMetricsEnabledKey()) +class IndexedLatencyMeasurer(Elaboratable): + """ + Measures duration between two events, e.g. request processing latency. + It can track multiple events at the same time, i.e. the second event can + be registered as started, before the first finishes. However, each event + needs to have an unique slot index. + + The module exposes an exponential histogram of the measured latencies. + """ + + def __init__( + self, + fully_qualified_name: str, + description: str = "", + *, + slots_number: int, + max_latency: int, + ): + """ + Parameters + ---------- + fully_qualified_name: str + The fully qualified name of the metric. + description: str + A human-readable description of the metric's functionality. + slots_number: int + A number of events that the module can track simultaneously. + max_latency: int + The maximum latency of an event. Used to set signal widths and + number of buckets in the histogram. If a latency turns to be + bigger than the maximum, it will overflow and result in a false + measurement. + """ + self.fully_qualified_name = fully_qualified_name + self.description = description + self.slots_number = slots_number + self.max_latency = max_latency + + self._start = Method(i=[("slot", range(0, slots_number))]) + self._stop = Method(i=[("slot", range(0, slots_number))]) + + # This bucket count gives us the best possible granularity. + bucket_count = bits_for(self.max_latency) + 1 + self.histogram = HwExpHistogram( + self.fully_qualified_name, + self.description, + bucket_count=bucket_count, + sample_width=bits_for(self.max_latency), + ) + + def elaborate(self, platform): + if not self.metrics_enabled(): + return TModule() + + m = TModule() + + epoch_width = bits_for(self.max_latency) + + m.submodules.slots = self.slots = AsyncMemoryBank( + data_layout=[("epoch", epoch_width)], elem_count=self.slots_number + ) + m.submodules.histogram = self.histogram + + epoch = Signal(epoch_width) + + m.d.sync += epoch.eq(epoch + 1) + + @def_method(m, self._start) + def _(slot): + self.slots.write(m, slot, epoch) + + @def_method(m, self._stop) + def _(slot): + ret = self.slots.read(m, slot) + # The result of substracting two unsigned n-bit is a signed (n+1)-bit value, + # so we need to cast the result and discard the most significant bit. + duration = (epoch - ret.epoch).as_unsigned()[:-1] + self.histogram.add(m, duration) + + return m + + def start(self, m: TModule, slot: ValueLike): + """ + Registers the start of an event. Can be called before the previous events + finish. If there are no slots available, the method will be blocked. + + Should be called in the body of either a transaction or a method. + + Parameters + ---------- + m: TModule + Transactron module + """ + + if not self.metrics_enabled(): + return + + self._start(m, slot) + + def stop(self, m: TModule, slot: ValueLike): + """ + Registers the end of the oldest event (the FIFO order). If there are no + started events in the queue, the method will block. + + Should be called in the body of either a transaction or a method. + + Parameters + ---------- + m: TModule + Transactron module + """ + + if not self.metrics_enabled(): + return + + self._stop(m, slot) + + def metrics_enabled(self) -> bool: + return DependencyContext.get().get_dependency(HwMetricsEnabledKey()) + + class HardwareMetricsManager: """ Collects all metrics registered in the circuit and provides an easy diff --git a/transactron/lib/storage.py b/transactron/lib/storage.py index e6d3e5cf5..a9be66020 100644 --- a/transactron/lib/storage.py +++ b/transactron/lib/storage.py @@ -8,7 +8,7 @@ from transactron.utils import assign, AssignType, LayoutList from .reqres import ArgumentsToResultsZipper -__all__ = ["MemoryBank"] +__all__ = ["MemoryBank", "AsyncMemoryBank"] class MemoryBank(Elaboratable): @@ -136,3 +136,78 @@ def _(arg): m.d.comb += assign(write_args, arg, fields=AssignType.ALL) return m + + +class AsyncMemoryBank(Elaboratable): + """AsyncMemoryBank module. + + Provides a transactional interface to asynchronous Amaranth Memory with one + read and one write port. It supports optionally writing with given granularity. + + Attributes + ---------- + read: Method + The read method. Accepts an `addr` from which data should be read. + The read response method. Return `data_layout` View which was saved on `addr` given by last + `read_req` method call. + write: Method + The write method. Accepts `addr` where data should be saved, `data` in form of `data_layout` + and optionally `mask` if `granularity` is not None. `1` in mask means that appropriate part should be written. + """ + + def __init__( + self, *, data_layout: LayoutList, elem_count: int, granularity: Optional[int] = None, src_loc: int | SrcLoc = 0 + ): + """ + Parameters + ---------- + data_layout: method layout + The format of structures stored in the Memory. + elem_count: int + Number of elements stored in Memory. + granularity: Optional[int] + Granularity of write, forwarded to Amaranth. If `None` the whole structure is always saved at once. + If not, the width of `data_layout` is split into `granularity` parts, which can be saved independently. + src_loc: int | SrcLoc + How many stack frames deep the source location is taken from. + Alternatively, the source location to use instead of the default. + """ + self.src_loc = get_src_loc(src_loc) + self.data_layout = make_layout(*data_layout) + self.elem_count = elem_count + self.granularity = granularity + self.width = from_method_layout(self.data_layout).size + self.addr_width = bits_for(self.elem_count - 1) + + self.read_req_layout: LayoutList = [("addr", self.addr_width)] + write_layout = [("addr", self.addr_width), ("data", self.data_layout)] + if self.granularity is not None: + write_layout.append(("mask", self.width // self.granularity)) + self.write_layout = make_layout(*write_layout) + + self.read = Method(i=self.read_req_layout, o=self.data_layout, src_loc=self.src_loc) + self.write = Method(i=self.write_layout, src_loc=self.src_loc) + + def elaborate(self, platform) -> TModule: + m = TModule() + + mem = Memory(width=self.width, depth=self.elem_count) + m.submodules.read_port = read_port = mem.read_port(domain="comb") + m.submodules.write_port = write_port = mem.write_port() + + @def_method(m, self.read) + def _(addr): + m.d.comb += read_port.addr.eq(addr) + m.d.comb += read_port.en.eq(1) + return read_port.data + + @def_method(m, self.write) + def _(arg): + m.d.comb += write_port.addr.eq(arg.addr) + m.d.comb += write_port.data.eq(arg.data) + if self.granularity is None: + m.d.comb += write_port.en.eq(1) + else: + m.d.comb += write_port.en.eq(arg.mask) + + return m From bbfa39cb37b9856039a6e9b09435ebc22b62879b Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Wed, 27 Mar 2024 16:42:40 +0100 Subject: [PATCH 04/14] Fix errors --- transactron/lib/metrics.py | 4 ++-- transactron/lib/storage.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/transactron/lib/metrics.py b/transactron/lib/metrics.py index 82f6f0bfa..d52377ba6 100644 --- a/transactron/lib/metrics.py +++ b/transactron/lib/metrics.py @@ -543,11 +543,11 @@ def elaborate(self, platform): @def_method(m, self._start) def _(slot): - self.slots.write(m, slot, epoch) + self.slots.write(m, addr=slot, data=epoch) @def_method(m, self._stop) def _(slot): - ret = self.slots.read(m, slot) + ret = self.slots.read(m, addr=slot) # The result of substracting two unsigned n-bit is a signed (n+1)-bit value, # so we need to cast the result and discard the most significant bit. duration = (epoch - ret.epoch).as_unsigned()[:-1] diff --git a/transactron/lib/storage.py b/transactron/lib/storage.py index a9be66020..3bbf07624 100644 --- a/transactron/lib/storage.py +++ b/transactron/lib/storage.py @@ -198,7 +198,6 @@ def elaborate(self, platform) -> TModule: @def_method(m, self.read) def _(addr): m.d.comb += read_port.addr.eq(addr) - m.d.comb += read_port.en.eq(1) return read_port.data @def_method(m, self.write) From 74084719f9ca08ec2afc8e5587537a640a9a018a Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Thu, 28 Mar 2024 11:11:53 +0100 Subject: [PATCH 05/14] Documentation --- transactron/lib/metrics.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/transactron/lib/metrics.py b/transactron/lib/metrics.py index d52377ba6..816225765 100644 --- a/transactron/lib/metrics.py +++ b/transactron/lib/metrics.py @@ -555,10 +555,9 @@ def _(slot): return m - def start(self, m: TModule, slot: ValueLike): + def start(self, m: TModule, *, slot: ValueLike): """ - Registers the start of an event. Can be called before the previous events - finish. If there are no slots available, the method will be blocked. + Registers the start of an event for a given slot index. Should be called in the body of either a transaction or a method. @@ -566,6 +565,8 @@ def start(self, m: TModule, slot: ValueLike): ---------- m: TModule Transactron module + slot: ValueLike + The slot index of the event. """ if not self.metrics_enabled(): @@ -573,10 +574,9 @@ def start(self, m: TModule, slot: ValueLike): self._start(m, slot) - def stop(self, m: TModule, slot: ValueLike): + def stop(self, m: TModule, *, slot: ValueLike): """ - Registers the end of the oldest event (the FIFO order). If there are no - started events in the queue, the method will block. + Registers the end of the event for a given slot index. Should be called in the body of either a transaction or a method. @@ -584,6 +584,8 @@ def stop(self, m: TModule, slot: ValueLike): ---------- m: TModule Transactron module + slot: ValueLike + The slot index of the event. """ if not self.metrics_enabled(): From 8363f21f5f6469367d1797e88c075d0bd3e2b241 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Thu, 28 Mar 2024 11:34:23 +0100 Subject: [PATCH 06/14] Test for IndexedLatencyMeasurer --- test/transactron/test_metrics.py | 99 ++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py index c52e9aed4..5eaac6066 100644 --- a/test/transactron/test_metrics.py +++ b/test/transactron/test_metrics.py @@ -305,6 +305,105 @@ def consumer(): sim.add_sync_process(ticker) +@parameterized_class( + ("slots_number", "expected_consumer_wait"), + [ + (2, 5), + (2, 10), + (5, 10), + (10, 1), + (10, 10), + (5, 5), + ], +) +class TestIndexedLatencyMeasurer(TestCaseWithSimulator): + slots_number: int + expected_consumer_wait: float + + def test_latency_measurer(self): + random.seed(42) + + m = SimpleTestCircuit(IndexedLatencyMeasurer("latency", slots_number=self.slots_number, max_latency=300)) + DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True) + + latencies: list[int] = [] + + events = list(0 for _ in range(self.slots_number)) + free_slots = list(k for k in range(self.slots_number)) + used_slots: list[int] = [] + + time = 0 + + def ticker(): + nonlocal time + + yield Passive() + + while True: + yield + time += 1 + + finish = False + + def producer(): + nonlocal finish + + for _ in range(200): + if not free_slots: + yield + continue + + slot_id = random.choice(free_slots) + yield from m._start.call(slot=slot_id) + + # Make sure that the time is updated first. + yield Settle() + + events[slot_id] = time + free_slots.remove(slot_id) + used_slots.append(slot_id) + + yield from self.random_wait_geom(0.8) + + finish = True + + def consumer(): + while not finish: + if not used_slots: + yield + continue + + slot_id = random.choice(used_slots) + + yield from m._stop.call(slot=slot_id) + + # Make sure that the time is updated first. + yield Settle() + + latencies.append(time - events[slot_id]) + used_slots.remove(slot_id) + free_slots.append(slot_id) + + yield from self.random_wait_geom(1.0 / self.expected_consumer_wait) + + self.assertEqual(min(latencies), (yield m._dut.histogram.min.value)) + self.assertEqual(max(latencies), (yield m._dut.histogram.max.value)) + self.assertEqual(sum(latencies), (yield m._dut.histogram.sum.value)) + self.assertEqual(len(latencies), (yield m._dut.histogram.count.value)) + + for i in range(m._dut.histogram.bucket_count): + bucket_start = 0 if i == 0 else 2 ** (i - 1) + bucket_end = 1e10 if i == m._dut.histogram.bucket_count - 1 else 2**i + + count = sum(1 for x in latencies if bucket_start <= x < bucket_end) + self.assertEqual(count, (yield m._dut.histogram.buckets[i].value)) + + with self.run_simulation(m) as sim: + sim.add_sync_process(producer) + sim.add_sync_process(consumer) + sim.add_sync_process(ticker) + + class MetricManagerTestCircuit(Elaboratable): def __init__(self): self.incr_counters = Method(i=[("counter1", 1), ("counter2", 1), ("counter3", 1)]) From 18119abda9fda8c16f0df557c7c4645d8a9490c8 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Thu, 28 Mar 2024 12:01:46 +0100 Subject: [PATCH 07/14] Test for AsyncMemoryBank --- test/transactions/test_transaction_lib.py | 45 +++++++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/test/transactions/test_transaction_lib.py b/test/transactions/test_transaction_lib.py index c8e758ce7..78119067f 100644 --- a/test/transactions/test_transaction_lib.py +++ b/test/transactions/test_transaction_lib.py @@ -142,7 +142,7 @@ def test_mem(self, max_addr, writer_rand, reader_req_rand, reader_resp_rand, see MemoryBank(data_layout=[("data", data_width)], elem_count=max_addr, safe_writes=safe_writes) ) - data_dict: dict[int, int] = dict((i, 0) for i in range(max_addr)) + data: list[int] = list(0 for _ in range(max_addr)) read_req_queue = deque() addr_queue = deque() @@ -155,7 +155,7 @@ def writer(): yield from m.write.call(data=d, addr=a) for _ in range(2): yield Settle() - data_dict[a] = d + data[a] = d yield from self.random_wait(writer_rand, min_cycle_cnt=1) def reader_req(): @@ -165,7 +165,7 @@ def reader_req(): for _ in range(1): yield Settle() if safe_writes: - d = data_dict[a] + d = data[a] read_req_queue.append(d) else: addr_queue.append((cycle, a)) @@ -188,7 +188,7 @@ def internal_reader_resp(): else: yield continue - d = data_dict[a] + d = data[a] # check when internal method has been run to capture # memory state for tests purposes if (yield m._dut._internal_read_resp_trans.grant): @@ -232,6 +232,43 @@ def process(): sim.add_sync_process(process) +class TestAsyncMemoryBank(TestCaseWithSimulator): + @parameterized.expand([(9, 3, 3, 14), (16, 1, 1, 15), (16, 1, 1, 16), (12, 3, 1, 17)]) + def test_mem(self, max_addr, writer_rand, reader_rand, seed): + test_count = 200 + + data_width = 6 + m = SimpleTestCircuit(AsyncMemoryBank(data_layout=[("data", data_width)], elem_count=max_addr)) + + data: list[int] = list(0 for i in range(max_addr)) + + random.seed(seed) + + def writer(): + for cycle in range(test_count): + d = random.randrange(2**data_width) + a = random.randrange(max_addr) + yield from m.write.call(data=d, addr=a) + for _ in range(2): + yield Settle() + data[a] = d + yield from self.random_wait(writer_rand, min_cycle_cnt=1) + + def reader(): + for cycle in range(test_count): + a = random.randrange(max_addr) + d = yield from m.read.call(addr=a) + for _ in range(1): + yield Settle() + expected_d = data[a] + self.assertEqual(d["data"], expected_d) + yield from self.random_wait(reader_rand, min_cycle_cnt=1) + + with self.run_simulation(m) as sim: + sim.add_sync_process(reader) + sim.add_sync_process(writer) + + class ManyToOneConnectTransTestCircuit(Elaboratable): def __init__(self, count: int, lay: MethodLayout): self.count = count From 472adfbb45c3a7f45e08ee0d888d8e6185c6e7e1 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Thu, 28 Mar 2024 17:58:03 +0100 Subject: [PATCH 08/14] Address review comments --- coreblocks/core_structs/rf.py | 7 ++----- coreblocks/func_blocks/fu/common/rs.py | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/coreblocks/core_structs/rf.py b/coreblocks/core_structs/rf.py index d6bed0c9f..e8c4e4ef3 100644 --- a/coreblocks/core_structs/rf.py +++ b/coreblocks/core_structs/rf.py @@ -1,10 +1,9 @@ -import operator from amaranth import * -from functools import reduce from transactron import Method, Transaction, def_method, TModule from coreblocks.interface.layouts import RFLayouts from coreblocks.params import GenParams from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer +from transactron.utils.amaranth_ext.functions import popcount from transactron.utils.transactron_helpers import make_layout __all__ = ["RegisterFile"] @@ -85,9 +84,7 @@ def _(reg_id: Value): if self.perf_num_valid.metrics_enabled(): num_valid = Signal(self.gen_params.phys_regs_bits + 1) m.d.comb += num_valid.eq( - reduce( - operator.add, (self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits)) - ) + popcount(Cat(self.entries[reg_id].valid for reg_id in range(2**self.gen_params.phys_regs_bits))) ) with Transaction(name="perf").body(m): self.perf_num_valid.add(m, num_valid) diff --git a/coreblocks/func_blocks/fu/common/rs.py b/coreblocks/func_blocks/fu/common/rs.py index 3c05c59f6..6c418f226 100644 --- a/coreblocks/func_blocks/fu/common/rs.py +++ b/coreblocks/func_blocks/fu/common/rs.py @@ -1,5 +1,3 @@ -import operator -from functools import reduce from collections.abc import Iterable from typing import Optional from amaranth import * @@ -10,6 +8,7 @@ from coreblocks.interface.layouts import RSLayouts from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer from transactron.utils import RecordDict +from transactron.utils.amaranth_ext.functions import popcount from transactron.utils.transactron_helpers import make_layout __all__ = ["RS"] @@ -130,9 +129,7 @@ def _() -> RecordDict: if self.perf_num_full.metrics_enabled(): num_full = Signal(self.rs_entries_bits + 1) - m.d.comb += num_full.eq( - reduce(operator.add, (self.data[entry_id].rec_full for entry_id in range(self.rs_entries))) - ) + m.d.comb += num_full.eq(popcount(Cat(self.data[entry_id].rec_full for entry_id in range(self.rs_entries)))) with Transaction(name="perf").body(m): self.perf_num_full.add(m, num_full) From 15cf0fefb9a9a2c0a4eff265f7ff136cfe77c84c Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Sun, 31 Mar 2024 16:00:20 +0200 Subject: [PATCH 09/14] Automatic generation of RS numbers --- coreblocks/func_blocks/csr/csr.py | 1 + coreblocks/func_blocks/fu/common/rs_func_block.py | 2 +- coreblocks/func_blocks/lsu/dummyLsu.py | 2 ++ coreblocks/params/configurations.py | 13 ++++++++----- coreblocks/params/fu_params.py | 2 ++ 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/coreblocks/func_blocks/csr/csr.py b/coreblocks/func_blocks/csr/csr.py index 43ddfe957..697de5c63 100644 --- a/coreblocks/func_blocks/csr/csr.py +++ b/coreblocks/func_blocks/csr/csr.py @@ -236,6 +236,7 @@ def _(rob_id: Value, side_fx: Value): return m +@dataclass(frozen=True) class CSRBlockComponent(BlockComponentParams): def get_module(self, gen_params: GenParams) -> FuncBlock: connections = gen_params.get(DependencyManager) diff --git a/coreblocks/func_blocks/fu/common/rs_func_block.py b/coreblocks/func_blocks/fu/common/rs_func_block.py index 6345caf6b..35801dc12 100644 --- a/coreblocks/func_blocks/fu/common/rs_func_block.py +++ b/coreblocks/func_blocks/fu/common/rs_func_block.py @@ -93,7 +93,7 @@ def elaborate(self, platform): class RSBlockComponent(BlockComponentParams): func_units: Collection[FunctionalComponentParams] rs_entries: int - rs_number: int + rs_number: int = -1 # overwritten by CoreConfiguration def get_module(self, gen_params: GenParams) -> FuncBlock: modules = list((u.get_module(gen_params), u.get_optypes()) for u in self.func_units) diff --git a/coreblocks/func_blocks/lsu/dummyLsu.py b/coreblocks/func_blocks/lsu/dummyLsu.py index ccda62e32..08a5d8604 100644 --- a/coreblocks/func_blocks/lsu/dummyLsu.py +++ b/coreblocks/func_blocks/lsu/dummyLsu.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from amaranth import * from amaranth.lib.data import View @@ -320,6 +321,7 @@ def _(rob_id: Value, side_fx: Value): return m +@dataclass(frozen=True) class LSUBlockComponent(BlockComponentParams): def get_module(self, gen_params: GenParams) -> FuncBlock: connections = gen_params.get(DependencyManager) diff --git a/coreblocks/params/configurations.py b/coreblocks/params/configurations.py index b7fb53173..c8dd6810c 100644 --- a/coreblocks/params/configurations.py +++ b/coreblocks/params/configurations.py @@ -26,7 +26,6 @@ RSBlockComponent( [ALUComponent(), ShiftUnitComponent(), JumpComponent(), ExceptionUnitComponent(), PrivilegedUnitComponent()], rs_entries=4, - rs_number=0, ), LSUBlockComponent(), CSRBlockComponent(), @@ -73,6 +72,12 @@ class CoreConfiguration: Definitions of PMAs per contiguous segments of memory. """ + def __post_init__(self): + self.func_units_config = [ + dataclasses.replace(conf, rs_number=k) if hasattr(conf, "rs_number") else conf + for k, conf in enumerate(self.func_units_config) + ] + xlen: int = 32 func_units_config: Collection[BlockComponentParams] = basic_configuration @@ -107,7 +112,7 @@ def replace(self, **kwargs): tiny_core_config = CoreConfiguration( embedded=True, func_units_config=( - RSBlockComponent([ALUComponent(), ShiftUnitComponent(), JumpComponent()], rs_entries=2, rs_number=0), + RSBlockComponent([ALUComponent(), ShiftUnitComponent(), JumpComponent()], rs_entries=2), LSUBlockComponent(), ), phys_regs_bits=basic_core_config.phys_regs_bits - 1, @@ -129,7 +134,6 @@ def replace(self, **kwargs): PrivilegedUnitComponent(), ], rs_entries=4, - rs_number=0, ), RSBlockComponent( [ @@ -137,7 +141,6 @@ def replace(self, **kwargs): DivComponent(), ], rs_entries=2, - rs_number=1, ), LSUBlockComponent(), CSRBlockComponent(), @@ -147,7 +150,7 @@ def replace(self, **kwargs): # Core configuration used in internal testbenches test_core_config = CoreConfiguration( - func_units_config=tuple(RSBlockComponent([], rs_entries=4, rs_number=k) for k in range(2)), + func_units_config=tuple(RSBlockComponent([], rs_entries=4) for _ in range(2)), rob_entries_bits=7, phys_regs_bits=7, _implied_extensions=Extension.I, diff --git a/coreblocks/params/fu_params.py b/coreblocks/params/fu_params.py index 297e9e9fc..4884d7c9f 100644 --- a/coreblocks/params/fu_params.py +++ b/coreblocks/params/fu_params.py @@ -1,4 +1,5 @@ from abc import abstractmethod, ABC +from dataclasses import dataclass from collections.abc import Collection, Iterable from coreblocks.func_blocks.interface.func_protocols import FuncBlock, FuncUnit @@ -20,6 +21,7 @@ ] +@dataclass(frozen=True) class BlockComponentParams(ABC): @abstractmethod def get_module(self, gen_params: "GenParams") -> FuncBlock: From f9313515db9bd68b7334611fd869f7b203adfa45 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Sun, 31 Mar 2024 16:17:26 +0200 Subject: [PATCH 10/14] Use Now(), increase number of tests, fixes --- test/transactron/test_metrics.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py index 5eaac6066..c197c5605 100644 --- a/test/transactron/test_metrics.py +++ b/test/transactron/test_metrics.py @@ -9,6 +9,7 @@ from transactron.lib.metrics import * from transactron import * from transactron.testing import TestCaseWithSimulator, data_layout, SimpleTestCircuit +from transactron.testing.infrastructure import Now from transactron.utils.dependencies import DependencyContext @@ -332,32 +333,21 @@ def test_latency_measurer(self): free_slots = list(k for k in range(self.slots_number)) used_slots: list[int] = [] - time = 0 - - def ticker(): - nonlocal time - - yield Passive() - - while True: - yield - time += 1 - finish = False def producer(): nonlocal finish for _ in range(200): - if not free_slots: + while not free_slots: yield continue + yield Settle() slot_id = random.choice(free_slots) yield from m._start.call(slot=slot_id) - # Make sure that the time is updated first. - yield Settle() + time = (yield Now()) events[slot_id] = time free_slots.remove(slot_id) @@ -369,7 +359,7 @@ def producer(): def consumer(): while not finish: - if not used_slots: + while not used_slots: yield continue @@ -377,7 +367,9 @@ def consumer(): yield from m._stop.call(slot=slot_id) - # Make sure that the time is updated first. + time = (yield Now()) + + yield Settle() yield Settle() latencies.append(time - events[slot_id]) @@ -401,7 +393,6 @@ def consumer(): with self.run_simulation(m) as sim: sim.add_sync_process(producer) sim.add_sync_process(consumer) - sim.add_sync_process(ticker) class MetricManagerTestCircuit(Elaboratable): From 2971e307ae2764e7530d16dd4a0a4cca4bb04579 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Sun, 31 Mar 2024 16:24:43 +0200 Subject: [PATCH 11/14] Use Now() in another test --- test/transactron/test_metrics.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py index c197c5605..16d1cdf1e 100644 --- a/test/transactron/test_metrics.py +++ b/test/transactron/test_metrics.py @@ -252,17 +252,6 @@ def test_latency_measurer(self): event_queue = queue.Queue() - time = 0 - - def ticker(): - nonlocal time - - yield Passive() - - while True: - yield - time += 1 - finish = False def producer(): @@ -273,6 +262,7 @@ def producer(): # Make sure that the time is updated first. yield Settle() + time = (yield Now()) event_queue.put(time) yield from self.random_wait_geom(0.8) @@ -284,6 +274,7 @@ def consumer(): # Make sure that the time is updated first. yield Settle() + time = (yield Now()) latencies.append(time - event_queue.get()) yield from self.random_wait_geom(1.0 / self.expected_consumer_wait) @@ -303,7 +294,6 @@ def consumer(): with self.run_simulation(m) as sim: sim.add_sync_process(producer) sim.add_sync_process(consumer) - sim.add_sync_process(ticker) @parameterized_class( From 463f29f94c301758a3f5d56a5167af3316462ac6 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Sun, 31 Mar 2024 17:12:23 +0200 Subject: [PATCH 12/14] LatencyMeasurer test refactor --- test/transactron/test_metrics.py | 53 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py index 16d1cdf1e..7005bc41a 100644 --- a/test/transactron/test_metrics.py +++ b/test/transactron/test_metrics.py @@ -4,7 +4,7 @@ from parameterized import parameterized_class from amaranth import * -from amaranth.sim import Passive, Settle +from amaranth.sim import Settle from transactron.lib.metrics import * from transactron import * @@ -227,6 +227,21 @@ def test_process(): sim.add_sync_process(test_process) +class TestLatencyMeasurerBase(TestCaseWithSimulator): + def check_latencies(self, m: SimpleTestCircuit, latencies: list[int]): + self.assertEqual(min(latencies), (yield m._dut.histogram.min.value)) + self.assertEqual(max(latencies), (yield m._dut.histogram.max.value)) + self.assertEqual(sum(latencies), (yield m._dut.histogram.sum.value)) + self.assertEqual(len(latencies), (yield m._dut.histogram.count.value)) + + for i in range(m._dut.histogram.bucket_count): + bucket_start = 0 if i == 0 else 2 ** (i - 1) + bucket_end = 1e10 if i == m._dut.histogram.bucket_count - 1 else 2**i + + count = sum(1 for x in latencies if bucket_start <= x < bucket_end) + self.assertEqual(count, (yield m._dut.histogram.buckets[i].value)) + + @parameterized_class( ("slots_number", "expected_consumer_wait"), [ @@ -238,7 +253,7 @@ def test_process(): (5, 5), ], ) -class TestFIFOLatencyMeasurer(TestCaseWithSimulator): +class TestFIFOLatencyMeasurer(TestLatencyMeasurerBase): slots_number: int expected_consumer_wait: float @@ -262,7 +277,7 @@ def producer(): # Make sure that the time is updated first. yield Settle() - time = (yield Now()) + time = yield Now() event_queue.put(time) yield from self.random_wait_geom(0.8) @@ -274,22 +289,12 @@ def consumer(): # Make sure that the time is updated first. yield Settle() - time = (yield Now()) + time = yield Now() latencies.append(time - event_queue.get()) yield from self.random_wait_geom(1.0 / self.expected_consumer_wait) - self.assertEqual(min(latencies), (yield m._dut.histogram.min.value)) - self.assertEqual(max(latencies), (yield m._dut.histogram.max.value)) - self.assertEqual(sum(latencies), (yield m._dut.histogram.sum.value)) - self.assertEqual(len(latencies), (yield m._dut.histogram.count.value)) - - for i in range(m._dut.histogram.bucket_count): - bucket_start = 0 if i == 0 else 2 ** (i - 1) - bucket_end = 1e10 if i == m._dut.histogram.bucket_count - 1 else 2**i - - count = sum(1 for x in latencies if bucket_start <= x < bucket_end) - self.assertEqual(count, (yield m._dut.histogram.buckets[i].value)) + self.check_latencies(m, latencies) with self.run_simulation(m) as sim: sim.add_sync_process(producer) @@ -307,7 +312,7 @@ def consumer(): (5, 5), ], ) -class TestIndexedLatencyMeasurer(TestCaseWithSimulator): +class TestIndexedLatencyMeasurer(TestLatencyMeasurerBase): slots_number: int expected_consumer_wait: float @@ -337,7 +342,7 @@ def producer(): slot_id = random.choice(free_slots) yield from m._start.call(slot=slot_id) - time = (yield Now()) + time = yield Now() events[slot_id] = time free_slots.remove(slot_id) @@ -357,7 +362,7 @@ def consumer(): yield from m._stop.call(slot=slot_id) - time = (yield Now()) + time = yield Now() yield Settle() yield Settle() @@ -368,17 +373,7 @@ def consumer(): yield from self.random_wait_geom(1.0 / self.expected_consumer_wait) - self.assertEqual(min(latencies), (yield m._dut.histogram.min.value)) - self.assertEqual(max(latencies), (yield m._dut.histogram.max.value)) - self.assertEqual(sum(latencies), (yield m._dut.histogram.sum.value)) - self.assertEqual(len(latencies), (yield m._dut.histogram.count.value)) - - for i in range(m._dut.histogram.bucket_count): - bucket_start = 0 if i == 0 else 2 ** (i - 1) - bucket_end = 1e10 if i == m._dut.histogram.bucket_count - 1 else 2**i - - count = sum(1 for x in latencies if bucket_start <= x < bucket_end) - self.assertEqual(count, (yield m._dut.histogram.buckets[i].value)) + self.check_latencies(m, latencies) with self.run_simulation(m) as sim: sim.add_sync_process(producer) From 110d596f35e29d6d804ab78dfa9870c303492494 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Mon, 1 Apr 2024 14:30:09 +0200 Subject: [PATCH 13/14] Add assertions to IndexedLatencyMeasurer --- transactron/lib/metrics.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/transactron/lib/metrics.py b/transactron/lib/metrics.py index 816225765..68f7de00e 100644 --- a/transactron/lib/metrics.py +++ b/transactron/lib/metrics.py @@ -9,7 +9,7 @@ from transactron.utils import ValueLike from transactron import Method, def_method, TModule from transactron.utils import SignalBundle -from transactron.lib import FIFO, AsyncMemoryBank +from transactron.lib import FIFO, AsyncMemoryBank, logging from transactron.utils.dependencies import ListKey, DependencyContext, SimpleKey __all__ = [ @@ -524,6 +524,8 @@ def __init__( sample_width=bits_for(self.max_latency), ) + self.log = logging.HardwareLogger(fully_qualified_name) + def elaborate(self, platform): if not self.metrics_enabled(): return TModule() @@ -537,16 +539,28 @@ def elaborate(self, platform): ) m.submodules.histogram = self.histogram + slots_taken = Signal(self.slots_number) + slots_taken_start = Signal.like(slots_taken) + slots_taken_stop = Signal.like(slots_taken) + + m.d.comb += slots_taken_start.eq(slots_taken) + m.d.comb += slots_taken_stop.eq(slots_taken_start) + m.d.sync += slots_taken.eq(slots_taken_stop) + epoch = Signal(epoch_width) m.d.sync += epoch.eq(epoch + 1) @def_method(m, self._start) - def _(slot): + def _(slot: Value): + m.d.comb += slots_taken_start.eq(slots_taken | (1 << slot)) + self.log.error(m, (slots_taken & (1 << slot)).any(), "taken slot {} taken again", slot) self.slots.write(m, addr=slot, data=epoch) @def_method(m, self._stop) - def _(slot): + def _(slot: Value): + m.d.comb += slots_taken_stop.eq(slots_taken_start & ~(C(1, self.slots_number) << slot)) + self.log.error(m, ~(slots_taken & (1 << slot)).any(), "free slot {} freed again", slot) ret = self.slots.read(m, addr=slot) # The result of substracting two unsigned n-bit is a signed (n+1)-bit value, # so we need to cast the result and discard the most significant bit. From a3189030eff5f8b66ab36ebc88c76c8e1fbab1f9 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Mon, 1 Apr 2024 14:31:31 +0200 Subject: [PATCH 14/14] Change Indexed to Tagged --- coreblocks/core_structs/rf.py | 4 ++-- coreblocks/func_blocks/fu/common/rs.py | 4 ++-- test/transactron/test_metrics.py | 2 +- transactron/lib/metrics.py | 14 +++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/coreblocks/core_structs/rf.py b/coreblocks/core_structs/rf.py index e8c4e4ef3..d6d5e76e8 100644 --- a/coreblocks/core_structs/rf.py +++ b/coreblocks/core_structs/rf.py @@ -2,7 +2,7 @@ from transactron import Method, Transaction, def_method, TModule from coreblocks.interface.layouts import RFLayouts from coreblocks.params import GenParams -from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer +from transactron.lib.metrics import HwExpHistogram, TaggedLatencyMeasurer from transactron.utils.amaranth_ext.functions import popcount from transactron.utils.transactron_helpers import make_layout @@ -22,7 +22,7 @@ def __init__(self, *, gen_params: GenParams): self.write = Method(i=layouts.rf_write) self.free = Method(i=layouts.rf_free) - self.perf_rf_valid_time = IndexedLatencyMeasurer( + self.perf_rf_valid_time = TaggedLatencyMeasurer( "struct.rf.valid_time", description="Distribution of time registers are valid in RF", slots_number=2**gen_params.phys_regs_bits, diff --git a/coreblocks/func_blocks/fu/common/rs.py b/coreblocks/func_blocks/fu/common/rs.py index 6c418f226..1911690b4 100644 --- a/coreblocks/func_blocks/fu/common/rs.py +++ b/coreblocks/func_blocks/fu/common/rs.py @@ -6,7 +6,7 @@ from coreblocks.params import GenParams from coreblocks.frontend.decoder import OpType from coreblocks.interface.layouts import RSLayouts -from transactron.lib.metrics import HwExpHistogram, IndexedLatencyMeasurer +from transactron.lib.metrics import HwExpHistogram, TaggedLatencyMeasurer from transactron.utils import RecordDict from transactron.utils.amaranth_ext.functions import popcount from transactron.utils.transactron_helpers import make_layout @@ -44,7 +44,7 @@ def __init__( self.data = Array(Signal(self.internal_layout) for _ in range(self.rs_entries)) self.data_ready = Signal(self.rs_entries) - self.perf_rs_wait_time = IndexedLatencyMeasurer( + self.perf_rs_wait_time = TaggedLatencyMeasurer( f"fu.block_{rs_number}.rs.valid_time", description=f"Distribution of time instructions wait in RS {rs_number}", slots_number=2**self.rs_entries_bits, diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py index 7005bc41a..6b0e4f738 100644 --- a/test/transactron/test_metrics.py +++ b/test/transactron/test_metrics.py @@ -319,7 +319,7 @@ class TestIndexedLatencyMeasurer(TestLatencyMeasurerBase): def test_latency_measurer(self): random.seed(42) - m = SimpleTestCircuit(IndexedLatencyMeasurer("latency", slots_number=self.slots_number, max_latency=300)) + m = SimpleTestCircuit(TaggedLatencyMeasurer("latency", slots_number=self.slots_number, max_latency=300)) DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True) latencies: list[int] = [] diff --git a/transactron/lib/metrics.py b/transactron/lib/metrics.py index 68f7de00e..b7e36a86c 100644 --- a/transactron/lib/metrics.py +++ b/transactron/lib/metrics.py @@ -19,7 +19,7 @@ "HwCounter", "HwExpHistogram", "FIFOLatencyMeasurer", - "IndexedLatencyMeasurer", + "TaggedLatencyMeasurer", "HardwareMetricsManager", "HwMetricsEnabledKey", ] @@ -474,12 +474,12 @@ def metrics_enabled(self) -> bool: return DependencyContext.get().get_dependency(HwMetricsEnabledKey()) -class IndexedLatencyMeasurer(Elaboratable): +class TaggedLatencyMeasurer(Elaboratable): """ Measures duration between two events, e.g. request processing latency. It can track multiple events at the same time, i.e. the second event can be registered as started, before the first finishes. However, each event - needs to have an unique slot index. + needs to have an unique slot tag. The module exposes an exponential histogram of the measured latencies. """ @@ -571,7 +571,7 @@ def _(slot: Value): def start(self, m: TModule, *, slot: ValueLike): """ - Registers the start of an event for a given slot index. + Registers the start of an event for a given slot tag. Should be called in the body of either a transaction or a method. @@ -580,7 +580,7 @@ def start(self, m: TModule, *, slot: ValueLike): m: TModule Transactron module slot: ValueLike - The slot index of the event. + The slot tag of the event. """ if not self.metrics_enabled(): @@ -590,7 +590,7 @@ def start(self, m: TModule, *, slot: ValueLike): def stop(self, m: TModule, *, slot: ValueLike): """ - Registers the end of the event for a given slot index. + Registers the end of the event for a given slot tag. Should be called in the body of either a transaction or a method. @@ -599,7 +599,7 @@ def stop(self, m: TModule, *, slot: ValueLike): m: TModule Transactron module slot: ValueLike - The slot index of the event. + The slot tag of the event. """ if not self.metrics_enabled():