From 5f51173cfd8737aa479b4055aea5a95ec033348c Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Thu, 1 Feb 2024 15:12:24 +0100
Subject: [PATCH 1/9] Add hardware metrics modules

---
 coreblocks/params/configurations.py     |   4 +
 coreblocks/params/genparams.py          |   2 +
 coreblocks/structs_common/hw_metrics.py | 545 ++++++++++++++++++++++++
 test/structs_common/test_hw_metrics.py  | 305 +++++++++++++
 4 files changed, 856 insertions(+)
 create mode 100644 coreblocks/structs_common/hw_metrics.py
 create mode 100644 test/structs_common/test_hw_metrics.py

diff --git a/coreblocks/params/configurations.py b/coreblocks/params/configurations.py
index a289c09e5..d8c1b0740 100644
--- a/coreblocks/params/configurations.py
+++ b/coreblocks/params/configurations.py
@@ -48,6 +48,8 @@ class CoreConfiguration:
         Enables 16-bit Compressed Instructions extension.
     embedded: bool
         Enables Reduced Integer (E) extension.
+    hardware_metrics: bool
+        Enable hardware metrics. If disabled, metrics will not be synthesized.
     phys_regs_bits: int
         Size of the Physical Register File is 2**phys_regs_bits.
     rob_entries_bits: int
@@ -76,6 +78,8 @@ class CoreConfiguration:
     compressed: bool = False
     embedded: bool = False
 
+    hardware_metrics: bool = True
+
     phys_regs_bits: int = 6
     rob_entries_bits: int = 7
     start_pc: int = 0
diff --git a/coreblocks/params/genparams.py b/coreblocks/params/genparams.py
index 916832b49..a6205a128 100644
--- a/coreblocks/params/genparams.py
+++ b/coreblocks/params/genparams.py
@@ -47,6 +47,8 @@ def __init__(self, cfg: CoreConfiguration):
             block_size_bits=cfg.icache_block_size_bits,
         )
 
+        self.hardware_metrics_enabled = cfg.hardware_metrics
+
         # Verification temporally disabled
         # if not optypes_required_by_extensions(self.isa.extensions) <= optypes_supported(func_units_config):
         #     raise Exception(f"Functional unit configuration fo not support all extension required by{isa_str}")
diff --git a/coreblocks/structs_common/hw_metrics.py b/coreblocks/structs_common/hw_metrics.py
new file mode 100644
index 000000000..659bc9e38
--- /dev/null
+++ b/coreblocks/structs_common/hw_metrics.py
@@ -0,0 +1,545 @@
+from dataclasses import dataclass, field
+from dataclasses_json import dataclass_json
+from typing import Optional
+from abc import ABC
+
+from amaranth import *
+from amaranth.utils import bits_for
+
+from transactron.utils import ValueLike
+from transactron import Method, def_method, TModule
+from transactron.utils import SignalBundle
+from transactron.lib import FIFO
+from coreblocks.params.genparams import GenParams
+from transactron.utils.dependencies import DependencyManager, ListKey
+
+
+@dataclass_json
+@dataclass(frozen=True)
+class MetricRegisterModel:
+    """
+    Represents a single register of a core metric, serving as a fundamental
+    building block that holds a singular value.
+
+    Attributes
+    ----------
+    name: str
+        The unique identifier for the register (among remaning
+        registers of a specific metric).
+    description: str
+        A brief description of the metric's purpose.
+    width: int
+        The bit-width of the register.
+    """
+
+    name: str
+    description: str
+    width: int
+
+
+@dataclass_json
+@dataclass
+class MetricModel:
+    """
+    Provides information about a metric exposed by the core. Each metric
+    comprises multiple registers, each dedicated to storing specific values.
+
+    The configuration of registers is internally determined by a
+    specific metric typ eand is not user-configurable.
+
+    Attributes
+    ----------
+    fully_qualified_name: str
+        The fully qualified name of the metric, with name components joined by dots ('.'),
+        e.g., 'foo.bar.requests'.
+    description: str
+        A human-readable description of the metric's functionality.
+    regs: list[MetricRegisterModel]
+        A list of registers associated with the metric.
+    """
+
+    fully_qualified_name: str
+    description: str
+    regs: dict[str, MetricRegisterModel] = field(default_factory=dict)
+
+
+class HwMetricRegister(MetricRegisterModel):
+    """
+    A concrete implementation of a metric register that holds its value as Amaranth signal.
+
+    Attributes
+    ----------
+    value: Signal
+        Amaranth signal representing the value of the register.
+    """
+
+    def __init__(self, name: str, width_bits: int, description: str = "", reset: int = 0):
+        """
+        Parameters
+        ----------
+        name: str
+            The unique identifier for the register (among remaning
+            registers of a specific metric).
+        width: int
+            The bit-width of the register.
+        description: str
+            A brief description of the metric's purpose.
+        reset: int
+            The reset value of the register.
+        """
+        super().__init__(name, description, width_bits)
+
+        self.value = Signal(width_bits, reset=reset, name=name)
+
+
+@dataclass(frozen=True)
+class HwMetricsListKey(ListKey["HwMetric"]):
+    """DependencyManager key collecting hardware metrics globally as a list."""
+
+    # This key is defined here, because it is only used internally by HwMetric and HardwareMetricsManager
+    pass
+
+
+class HwMetric(ABC, MetricModel):
+    """
+    A base for all metric implementations. It should be only used for declaring
+    new types of metrics.
+
+    It takes care of registering the metric in the dependency manager.
+
+    Attributes
+    ----------
+    gen_params: GenParams
+        Core generation parameters.
+    signals: dict[str, Signal]
+        A mapping from a register name to a Signal containing the value of that register.
+    """
+
+    def __init__(self, gen_params: GenParams, fully_qualified_name: str, description: str):
+        """
+        Parameters
+        ----------
+        gen_params: GenParams
+            Core generation parameters.
+        fully_qualified_name: str
+            The fully qualified name of the metric.
+        description: str
+            A human-readable description of the metric's functionality.
+        """
+        super().__init__(fully_qualified_name, description)
+
+        self.gen_params = gen_params
+
+        self.signals: dict[str, Signal] = {}
+
+        # add the metric to the global list
+        gen_params.get(DependencyManager).add_dependency(HwMetricsListKey(), self)
+
+    def add_registers(self, regs: list[HwMetricRegister]):
+        """
+        Adds registers to a metric. Should be only called by inheriting classes
+        during initialization.
+
+        Parameters
+        ----------
+        regs: list[HwMetricRegister]
+            A list of registers to be registered.
+        """
+        for reg in regs:
+            if reg.name in self.regs:
+                raise RuntimeError(f"Register {reg.name}' is already added to the metric {self.fully_qualified_name}")
+
+            self.regs[reg.name] = reg
+            self.signals[reg.name] = reg.value
+
+
+class HwCounter(Elaboratable, HwMetric):
+    """Hardware Counter
+
+    The most basic hardware metric that can just increase its value.
+    """
+
+    def __init__(self, gen_params: GenParams, fully_qualified_name: str, description: str = "", *, width_bits: int = 0):
+        """
+        Parameters
+        ----------
+        gen_params: GenParams
+            Core generation parameters.
+        fully_qualified_name: str
+            The fully qualified name of the metric.
+        description: str
+            A human-readable description of the metric's functionality.
+        width_bits: int
+            The bit-width of the register. If unspecified or equal to 0,
+            the register will have `xlen` bits width.
+        """
+
+        super().__init__(gen_params, fully_qualified_name, description)
+
+        self.count = HwMetricRegister(
+            "count", gen_params.isa.xlen if width_bits == 0 else width_bits, "the value of the counter"
+        )
+
+        self.add_registers([self.count])
+
+        self._incr = Method()
+
+    def elaborate(self, platform):
+        if not self.gen_params.hardware_metrics_enabled:
+            return Module()
+
+        m = TModule()
+
+        @def_method(m, self._incr)
+        def _():
+            m.d.sync += self.count.value.eq(self.count.value + 1)
+
+        return m
+
+    def incr(self, m: TModule):
+        """
+        Increases the value of the counter by 1.
+
+        Should be called in the body of either a transaction or a method.
+
+        Parameters
+        ----------
+        m: TModule
+            Transactron module
+        """
+        if not self.gen_params.hardware_metrics_enabled:
+            return
+
+        self._incr(m)
+
+    def incr_when(self, m: TModule, cond: ValueLike):
+        """
+        Conditionally increases the value of the counter by 1.
+
+        Should be called in the body of either a transaction or a method.
+
+        Parameters
+        ----------
+        m: TModule
+            Transactron module
+        cond: ValueLike
+            Signal to indicate if the counter should increase.
+        """
+
+        if not self.gen_params.hardware_metrics_enabled:
+            return
+
+        with m.If(cond):
+            self._incr(m)
+
+
+class HwExpHistogram(Elaboratable, HwMetric):
+    """Hardware Exponential Histogram
+
+    Represents the distribution of sampled data through a histogram. A histogram
+    samples observations (usually things like request durations or queue sizes) and counts
+    them in configurable buckets. The buckets are of exponential size. For example,
+    a histogram with 5 buckets would have the following value ranges:
+    [0, 1); [1, 2); [2, 4); [4, 8); [8, 16).
+
+    Additionally, the histogram tracks the number of observations, the sum
+    of observed values, and the minimum and maximum values.
+    """
+
+    def __init__(self, gen_params: GenParams, fully_qualified_name: str, description: str = "", *, max_value: int):
+        """
+        Parameters
+        ----------
+        gen_params: GenParams
+            Core generation parameters.
+        fully_qualified_name: str
+            The fully qualified name of the metric.
+        description: str
+            A human-readable description of the metric's functionality.
+        max_value: int
+            The maximum value that the histogram would be able to count. This
+            value is used to calculate the number of buckets.
+        """
+
+        super().__init__(gen_params, fully_qualified_name, description)
+        self.max_sample_width = bits_for(max_value)
+        self.bucket_count = self.max_sample_width + 1
+
+        self._add = Method(i=[("sample", self.max_sample_width)])
+
+        self.count = HwMetricRegister("count", gen_params.isa.xlen, "the count of events that have been observed")
+        self.sum = HwMetricRegister("sum", gen_params.isa.xlen, "the total sum of all observed values")
+        self.min = HwMetricRegister(
+            "min",
+            self.max_sample_width,
+            "the minimum of all observed values",
+            reset=(1 << self.max_sample_width) - 1,
+        )
+        self.max = HwMetricRegister("max", self.max_sample_width, "the maximum of all observed values")
+        self.buckets = [
+            HwMetricRegister(
+                f"bucket-{2**i}",
+                gen_params.isa.xlen,
+                f"the cumulative counter for the observation bucket [{0 if i == 0 else 2**(i-1)}, {2**i})",
+            )
+            for i in range(self.bucket_count)
+        ]
+
+        self.add_registers([self.count, self.sum, self.max, self.min] + self.buckets)
+
+    def elaborate(self, platform):
+        if not self.gen_params.hardware_metrics_enabled:
+            return Module()
+
+        m = TModule()
+
+        @def_method(m, self._add)
+        def _(sample):
+            m.d.sync += self.count.value.eq(self.count.value + 1)
+            m.d.sync += self.sum.value.eq(self.sum.value + sample)
+
+            with m.If(sample > self.max.value):
+                m.d.sync += self.max.value.eq(sample)
+
+            with m.If(sample < self.min.value):
+                m.d.sync += self.min.value.eq(sample)
+
+            # No bits are set - goes to the first bucket.
+            with m.If(sample == 0):
+                m.d.sync += self.buckets[0].value.eq(self.buckets[0].value + 1)
+
+            # todo: perhaps replace with a recursive implementation of the priority encoder
+            bucket_idx = Signal(range(self.max_sample_width))
+            for i in range(self.max_sample_width):
+                with m.If(sample[i]):
+                    m.d.av_comb += bucket_idx.eq(i)
+
+            for i, bucket in enumerate(self.buckets):
+                if i == 0:
+                    continue
+
+                with m.If(bucket_idx == i - 1):
+                    m.d.sync += bucket.value.eq(bucket.value + 1)
+
+        return m
+
+    def add(self, m: TModule, sample: Value):
+        """
+        Adds a new sample to the histogram.
+
+        Should be called in the body of either a transaction or a method.
+
+        Parameters
+        ----------
+        m: TModule
+            Transactron module
+        sample: ValueLike
+            The value that will be added to the histogram
+        """
+
+        if not self.gen_params.hardware_metrics_enabled:
+            return
+
+        self._add(m, sample)
+
+
+class LatencyMeasurer(Elaboratable):
+    """
+    Measures duration between two events, e.g. request processing latency.
+    It can track multiple events at the same time, i.e. the second event can
+    be registered as started, before the first finishes. However, they must be
+    processed in the FIFO order.
+
+    The module exposes an exponential histogram of the measured latencies.
+    """
+
+    def __init__(
+        self,
+        gen_params: GenParams,
+        fully_qualified_name: str,
+        description: str = "",
+        *,
+        slots_number: int,
+        max_latency: int,
+    ):
+        """
+        Parameters
+        ----------
+        gen_params: GenParams
+            Core generation parameters.
+        fully_qualified_name: str
+            The fully qualified name of the metric.
+        slots_number: str
+            A number of events that the module can track simultaneously.
+        max_latency: int
+            The maximum latency of an event. Used to set signal widths and
+            number of buckets in the histogram. If a latency turns to be
+            bigger than the maximum, it will overflow and result in a false
+            measurement.
+        """
+        self.gen_params = gen_params
+        self.fully_qualified_name = fully_qualified_name
+        self.description = description
+        self.slots_number = slots_number
+        self.max_latency = max_latency
+
+        self._start = Method()
+        self._stop = Method()
+
+    def elaborate(self, platform):
+        if not self.gen_params.hardware_metrics_enabled:
+            return Module()
+
+        m = TModule()
+
+        epoch_width = bits_for(self.max_latency)
+
+        m.submodules.fifo = self.fifo = FIFO([("epoch", epoch_width)], self.slots_number)
+        m.submodules.histogram = self.histogram = HwExpHistogram(
+            self.gen_params, self.fully_qualified_name, self.description, max_value=self.max_latency
+        )
+
+        epoch = Signal(epoch_width)
+
+        m.d.sync += epoch.eq(epoch + 1)
+
+        @def_method(m, self._start)
+        def _():
+            self.fifo.write(m, epoch)
+
+        @def_method(m, self._stop)
+        def _():
+            ret = self.fifo.read(m)
+            # The result of substracting two unsigned n-bit is a signed (n+1)-bit value,
+            # so we need to cast the result and discard the most significant bit.
+            duration = (epoch - ret.epoch).as_unsigned()[:-1]
+            self.histogram.add(m, duration)
+
+        return m
+
+    def start(self, m: TModule):
+        """
+        Registers the start of an event. Can be called before the previous events
+        finish. If there are no slots available, the method will be blocked.
+
+        Should be called in the body of either a transaction or a method.
+
+        Parameters
+        ----------
+        m: TModule
+            Transactron module
+        """
+
+        if not self.gen_params.hardware_metrics_enabled:
+            return
+
+        self._start(m)
+
+    def stop(self, m: TModule):
+        """
+        Registers the end of the oldest event (the FIFO order). If there are no
+        started events in the queue, the method will block.
+
+        Should be called in the body of either a transaction or a method.
+
+        Parameters
+        ----------
+        m: TModule
+            Transactron module
+        """
+
+        if not self.gen_params.hardware_metrics_enabled:
+            return
+
+        self._stop(m)
+
+
+class HardwareMetricsManager:
+    """
+    Collects all metrics registered in the core and provides an easy
+    access to them.
+    """
+
+    def __init__(self, dep_manager: DependencyManager):
+        """
+        Parameters
+        ----------
+        dep_manager: DependencyManager
+            The dependency manager of the core.
+        """
+        self.dep_manager = dep_manager
+
+        self._metrics: Optional[dict[str, HwMetric]] = None
+
+    def _collect_metrics(self) -> dict[str, HwMetric]:
+        # We lazily collect all metrics so that the metrics manager can be
+        # constructed at any time. Otherwise, if a metric object was created
+        # after the manager object had been created, that metric wouldn't end up
+        # being registered.
+        metrics: dict[str, HwMetric] = {}
+        for metric in self.dep_manager.get_dependency(HwMetricsListKey()):
+            if metric.fully_qualified_name in metrics:
+                raise RuntimeError(f"Metric '{metric.fully_qualified_name}' is already registered")
+
+            metrics[metric.fully_qualified_name] = metric
+
+        return metrics
+
+    def get_metrics(self) -> dict[str, HwMetric]:
+        """
+        Returns all metrics registered in the core.
+        """
+        if self._metrics is None:
+            self._metrics = self._collect_metrics()
+        return self._metrics
+
+    def get_register_value(self, metric_name: str, reg_name: str) -> Signal:
+        """
+        Returns the signal holding the register value of the given metric.
+
+        Parameters
+        ----------
+        metric_name: str
+            The name of the metric.
+        reg_name: str
+            The name of the register.
+        """
+
+        metrics = self.get_metrics()
+        if metric_name not in metrics:
+            raise RuntimeError(f"Couldn't find metric '{metric_name}'")
+        return metrics[metric_name].signals[reg_name]
+
+    def debug_signals(self) -> SignalBundle:
+        """
+        Returns tree-like SignalBundle composed of all metric registers.
+        """
+        metrics = self.get_metrics()
+
+        def rec(metric_names: list[str], depth: int = 1):
+            bundle: list[SignalBundle] = []
+            components: dict[str, list[str]] = {}
+
+            for metric in metric_names:
+                parts = metric.split(".")
+
+                if len(parts) == depth:
+                    signals = metrics[metric].signals
+                    reg_values = [signals[reg_name] for reg_name in signals]
+
+                    bundle.append({metric: reg_values})
+
+                    continue
+
+                component_prefix = ".".join(parts[:depth])
+
+                if component_prefix not in components:
+                    components[component_prefix] = []
+                components[component_prefix].append(metric)
+
+            for component_name, elements in components.items():
+                bundle.append({component_name: rec(elements, depth + 1)})
+
+            return bundle
+
+        return {"metrics": rec(list(self.get_metrics().keys()))}
diff --git a/test/structs_common/test_hw_metrics.py b/test/structs_common/test_hw_metrics.py
new file mode 100644
index 000000000..564ec89c3
--- /dev/null
+++ b/test/structs_common/test_hw_metrics.py
@@ -0,0 +1,305 @@
+import json
+import random
+
+from amaranth import *
+
+from coreblocks.structs_common.hw_metrics import HwCounter, HwExpHistogram, HardwareMetricsManager
+from coreblocks.params import GenParams
+from coreblocks.params.configurations import test_core_config
+from transactron import *
+from transactron.utils import DependencyManager
+
+from ..common import *
+
+
+class CounterInMethodCircuit(Elaboratable):
+    def __init__(self, gen_params: GenParams):
+        self.method = Method()
+        self.counter = HwCounter(gen_params, "in_method")
+
+    def elaborate(self, platform):
+        m = TModule()
+
+        m.submodules.counter = self.counter
+
+        @def_method(m, self.method)
+        def _():
+            self.counter.incr(m)
+
+        return m
+
+
+class CounterWithConditionInMethodCircuit(Elaboratable):
+    def __init__(self, gen_params: GenParams):
+        self.method = Method(i=[("cond", 1)])
+        self.counter = HwCounter(gen_params, "with_condition_in_method")
+
+    def elaborate(self, platform):
+        m = TModule()
+
+        m.submodules.counter = self.counter
+
+        @def_method(m, self.method)
+        def _(cond):
+            self.counter.incr_when(m, cond)
+
+        return m
+
+
+class CounterWithoutMethodCircuit(Elaboratable):
+    def __init__(self, gen_params: GenParams):
+        self.cond = Signal()
+        self.counter = HwCounter(gen_params, "with_condition_without_method")
+
+    def elaborate(self, platform):
+        m = TModule()
+
+        m.submodules.counter = self.counter
+
+        with Transaction().body(m):
+            self.counter.incr_when(m, self.cond)
+
+        return m
+
+
+class TestHwCounter(TestCaseWithSimulator):
+    def setUp(self) -> None:
+        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
+
+        random.seed(42)
+
+    def test_counter_in_method(self):
+        m = SimpleTestCircuit(CounterInMethodCircuit(self.gen_params))
+
+        def test_process():
+            called_cnt = 0
+            for _ in range(200):
+                call_now = random.randint(0, 1) == 0
+
+                if call_now:
+                    yield from m.method.call()
+                else:
+                    yield
+
+                val = yield m._dut.counter.count.value
+                self.assertEqual(val, called_cnt)
+
+                if call_now:
+                    called_cnt += 1
+
+        with self.run_simulation(m) as sim:
+            sim.add_sync_process(test_process)
+
+    def test_counter_with_condition_in_method(self):
+        m = SimpleTestCircuit(CounterWithConditionInMethodCircuit(self.gen_params))
+
+        def test_process():
+            called_cnt = 0
+            for _ in range(200):
+                call_now = random.randint(0, 1) == 0
+                condition = random.randint(0, 1)
+
+                if call_now:
+                    yield from m.method.call(cond=condition)
+                else:
+                    yield
+
+                val = yield m._dut.counter.count.value
+                self.assertEqual(val, called_cnt)
+
+                if call_now and condition == 1:
+                    called_cnt += 1
+
+        with self.run_simulation(m) as sim:
+            sim.add_sync_process(test_process)
+
+    def test_counter_with_condition_without_method(self):
+        m = CounterWithoutMethodCircuit(self.gen_params)
+
+        def test_process():
+            called_cnt = 0
+            for _ in range(200):
+                condition = random.randint(0, 1)
+
+                yield m.cond.eq(condition)
+                yield
+
+                val = yield m.counter.count.value
+                self.assertEqual(val, called_cnt)
+
+                if condition == 1:
+                    called_cnt += 1
+
+        with self.run_simulation(m) as sim:
+            sim.add_sync_process(test_process)
+
+
+class ExpHistogramCircuit(Elaboratable):
+    def __init__(self, gen_params: GenParams, max_value: int):
+        self.method = Method(i=data_layout(32))
+        self.histogram = HwExpHistogram(gen_params, "histogram", max_value=max_value)
+
+    def elaborate(self, platform):
+        m = TModule()
+
+        m.submodules.histogram = self.histogram
+
+        @def_method(m, self.method)
+        def _(data):
+            self.histogram.add(m, data[0 : self.histogram.max_sample_width])
+
+        return m
+
+
+class TestHwHistogram(TestCaseWithSimulator):
+    def setUp(self) -> None:
+        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
+
+        random.seed(42)
+
+    def test_counter_in_method(self):
+        max_value = 100
+
+        m = SimpleTestCircuit(ExpHistogramCircuit(self.gen_params, max_value))
+
+        def test_process():
+            min = max_value + 1
+            max = 0
+            sum = 0
+            count = 0
+
+            bucket_cnt = 8
+            buckets = [0] * bucket_cnt
+
+            for _ in range(500):
+                if random.randrange(3) == 0:
+                    value = random.randint(0, max_value)
+                    value = 6
+                    if value < min:
+                        min = value
+                    if value > max:
+                        max = value
+                    sum += value
+                    count += 1
+                    for i in range(bucket_cnt):
+                        if value < 2**i:
+                            buckets[i] += 1
+                            break
+
+                    yield from m.method.call(data=value)
+                    yield
+                else:
+                    yield
+
+                histogram = m._dut.histogram
+                # Skip the assertion if the min is still uninitialized
+                if min != max_value + 1:
+                    self.assertEqual(min, (yield histogram.min.value))
+
+                self.assertEqual(max, (yield histogram.max.value))
+                self.assertEqual(sum, (yield histogram.sum.value))
+                self.assertEqual(count, (yield histogram.count.value))
+
+                for i in range(bucket_cnt):
+                    self.assertEqual(buckets[i], (yield histogram.buckets[i].value))
+
+        with self.run_simulation(m) as sim:
+            sim.add_sync_process(test_process)
+
+
+class MetricManagerTestCircuit(Elaboratable):
+    def __init__(self, gen_params: GenParams):
+        self.incr_counters = Method(i=[("counter1", 1), ("counter2", 1), ("counter3", 1)])
+
+        self.counter1 = HwCounter(gen_params, "foo.counter1", "this is the description")
+        self.counter2 = HwCounter(gen_params, "bar.baz.counter2")
+        self.counter3 = HwCounter(gen_params, "bar.baz.counter3", "yet another description")
+
+    def elaborate(self, platform):
+        m = TModule()
+
+        m.submodules += [self.counter1, self.counter2, self.counter3]
+
+        @def_method(m, self.incr_counters)
+        def _(counter1, counter2, counter3):
+            self.counter1.incr_when(m, counter1)
+            self.counter2.incr_when(m, counter2)
+            self.counter3.incr_when(m, counter3)
+
+        return m
+
+
+class TestMetricsManager(TestCaseWithSimulator):
+    def setUp(self) -> None:
+        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
+        self.metrics_manager = HardwareMetricsManager(self.gen_params.get(DependencyManager))
+
+        random.seed(42)
+
+    def test_metrics_metadata(self):
+        # We need to initialize the circuit to make sure that metrics are registered
+        # in the dependency manager.
+        m = MetricManagerTestCircuit(self.gen_params)
+
+        # Run the simulation so Amaranth doesn't scream that we have unused elaboratables.
+        with self.run_simulation(m):
+            pass
+
+        self.assertEqual(
+            self.metrics_manager.get_metrics()["foo.counter1"].to_json(),  # type: ignore
+            json.dumps(
+                {
+                    "fully_qualified_name": "foo.counter1",
+                    "description": "this is the description",
+                    "regs": {"count": {"name": "count", "description": "the value of the counter", "width": 32}},
+                }
+            ),
+        )
+
+        self.assertEqual(
+            self.metrics_manager.get_metrics()["bar.baz.counter2"].to_json(),  # type: ignore
+            json.dumps(
+                {
+                    "fully_qualified_name": "bar.baz.counter2",
+                    "description": "",
+                    "regs": {"count": {"name": "count", "description": "the value of the counter", "width": 32}},
+                }
+            ),
+        )
+
+        self.assertEqual(
+            self.metrics_manager.get_metrics()["bar.baz.counter3"].to_json(),  # type: ignore
+            json.dumps(
+                {
+                    "fully_qualified_name": "bar.baz.counter3",
+                    "description": "yet another description",
+                    "regs": {"count": {"name": "count", "description": "the value of the counter", "width": 32}},
+                }
+            ),
+        )
+
+    def test_returned_reg_values(self):
+        m = SimpleTestCircuit(MetricManagerTestCircuit(self.gen_params))
+
+        def test_process():
+            counters = [0] * 3
+            for _ in range(200):
+                rand = [random.randint(0, 1) for _ in range(3)]
+
+                yield from m.incr_counters.call(counter1=rand[0], counter2=rand[1], counter3=rand[2])
+                yield
+
+                for i in range(3):
+                    if rand[i] == 1:
+                        counters[i] += 1
+
+                self.assertEqual(counters[0], (yield self.metrics_manager.get_register_value("foo.counter1", "count")))
+                self.assertEqual(
+                    counters[1], (yield self.metrics_manager.get_register_value("bar.baz.counter2", "count"))
+                )
+                self.assertEqual(
+                    counters[2], (yield self.metrics_manager.get_register_value("bar.baz.counter3", "count"))
+                )
+
+        with self.run_simulation(m) as sim:
+            sim.add_sync_process(test_process)

From ca9d42935814f63b017c6d6d1ffc7c77ea7d4c58 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Fri, 2 Feb 2024 12:32:36 +0100
Subject: [PATCH 2/9] Review comments & tests for LatencyMeasurer

---
 coreblocks/structs_common/hw_metrics.py | 130 +++++++++++--------
 test/structs_common/test_hw_metrics.py  | 165 +++++++++++++++++++-----
 2 files changed, 210 insertions(+), 85 deletions(-)

diff --git a/coreblocks/structs_common/hw_metrics.py b/coreblocks/structs_common/hw_metrics.py
index 659bc9e38..1662a90ee 100644
--- a/coreblocks/structs_common/hw_metrics.py
+++ b/coreblocks/structs_common/hw_metrics.py
@@ -13,6 +13,16 @@
 from coreblocks.params.genparams import GenParams
 from transactron.utils.dependencies import DependencyManager, ListKey
 
+__all__ = [
+    "MetricRegisterModel",
+    "MetricModel",
+    "HwMetric",
+    "HwCounter",
+    "HwExpHistogram",
+    "LatencyMeasurer",
+    "HardwareMetricsManager",
+]
+
 
 @dataclass_json
 @dataclass(frozen=True)
@@ -45,7 +55,7 @@ class MetricModel:
     comprises multiple registers, each dedicated to storing specific values.
 
     The configuration of registers is internally determined by a
-    specific metric typ eand is not user-configurable.
+    specific metric type and is not user-configurable.
 
     Attributes
     ----------
@@ -186,7 +196,7 @@ def __init__(self, gen_params: GenParams, fully_qualified_name: str, description
 
     def elaborate(self, platform):
         if not self.gen_params.hardware_metrics_enabled:
-            return Module()
+            return TModule()
 
         m = TModule()
 
@@ -196,7 +206,7 @@ def _():
 
         return m
 
-    def incr(self, m: TModule):
+    def incr(self, m: TModule, *, cond: ValueLike = C(1)):
         """
         Increases the value of the counter by 1.
 
@@ -207,25 +217,6 @@ def incr(self, m: TModule):
         m: TModule
             Transactron module
         """
-        if not self.gen_params.hardware_metrics_enabled:
-            return
-
-        self._incr(m)
-
-    def incr_when(self, m: TModule, cond: ValueLike):
-        """
-        Conditionally increases the value of the counter by 1.
-
-        Should be called in the body of either a transaction or a method.
-
-        Parameters
-        ----------
-        m: TModule
-            Transactron module
-        cond: ValueLike
-            Signal to indicate if the counter should increase.
-        """
-
         if not self.gen_params.hardware_metrics_enabled:
             return
 
@@ -238,15 +229,24 @@ class HwExpHistogram(Elaboratable, HwMetric):
 
     Represents the distribution of sampled data through a histogram. A histogram
     samples observations (usually things like request durations or queue sizes) and counts
-    them in configurable buckets. The buckets are of exponential size. For example,
+    them in a configurable number of buckets. The buckets are of exponential size. For example,
     a histogram with 5 buckets would have the following value ranges:
-    [0, 1); [1, 2); [2, 4); [4, 8); [8, 16).
+    [0, 1); [1, 2); [2, 4); [4, 8); [8, +inf).
 
     Additionally, the histogram tracks the number of observations, the sum
     of observed values, and the minimum and maximum values.
     """
 
-    def __init__(self, gen_params: GenParams, fully_qualified_name: str, description: str = "", *, max_value: int):
+    def __init__(
+        self,
+        gen_params: GenParams,
+        fully_qualified_name: str,
+        description: str = "",
+        *,
+        bucket_count: int,
+        sample_width: int = 32,
+        registers_width: int = 32,
+    ):
         """
         Parameters
         ----------
@@ -262,34 +262,39 @@ def __init__(self, gen_params: GenParams, fully_qualified_name: str, description
         """
 
         super().__init__(gen_params, fully_qualified_name, description)
-        self.max_sample_width = bits_for(max_value)
-        self.bucket_count = self.max_sample_width + 1
+        self.bucket_count = bucket_count
+        self.sample_width = sample_width
 
-        self._add = Method(i=[("sample", self.max_sample_width)])
+        self._add = Method(i=[("sample", self.sample_width)])
 
-        self.count = HwMetricRegister("count", gen_params.isa.xlen, "the count of events that have been observed")
-        self.sum = HwMetricRegister("sum", gen_params.isa.xlen, "the total sum of all observed values")
+        self.count = HwMetricRegister("count", registers_width, "the count of events that have been observed")
+        self.sum = HwMetricRegister("sum", registers_width, "the total sum of all observed values")
         self.min = HwMetricRegister(
             "min",
-            self.max_sample_width,
+            self.sample_width,
             "the minimum of all observed values",
-            reset=(1 << self.max_sample_width) - 1,
+            reset=(1 << self.sample_width) - 1,
         )
-        self.max = HwMetricRegister("max", self.max_sample_width, "the maximum of all observed values")
-        self.buckets = [
-            HwMetricRegister(
-                f"bucket-{2**i}",
-                gen_params.isa.xlen,
-                f"the cumulative counter for the observation bucket [{0 if i == 0 else 2**(i-1)}, {2**i})",
+        self.max = HwMetricRegister("max", self.sample_width, "the maximum of all observed values")
+
+        self.buckets = []
+        for i in range(self.bucket_count):
+            bucket_start = 0 if i == 0 else 2 ** (i - 1)
+            bucket_end = "inf" if i == self.bucket_count - 1 else 2**i
+
+            self.buckets.append(
+                HwMetricRegister(
+                    f"bucket-{bucket_end}",
+                    registers_width,
+                    f"the cumulative counter for the observation bucket [{bucket_start}, {bucket_end})",
+                )
             )
-            for i in range(self.bucket_count)
-        ]
 
         self.add_registers([self.count, self.sum, self.max, self.min] + self.buckets)
 
     def elaborate(self, platform):
         if not self.gen_params.hardware_metrics_enabled:
-            return Module()
+            return TModule()
 
         m = TModule()
 
@@ -304,21 +309,24 @@ def _(sample):
             with m.If(sample < self.min.value):
                 m.d.sync += self.min.value.eq(sample)
 
-            # No bits are set - goes to the first bucket.
-            with m.If(sample == 0):
-                m.d.sync += self.buckets[0].value.eq(self.buckets[0].value + 1)
-
             # todo: perhaps replace with a recursive implementation of the priority encoder
-            bucket_idx = Signal(range(self.max_sample_width))
-            for i in range(self.max_sample_width):
+            bucket_idx = Signal(range(self.sample_width))
+            for i in range(self.sample_width):
                 with m.If(sample[i]):
                     m.d.av_comb += bucket_idx.eq(i)
 
             for i, bucket in enumerate(self.buckets):
+                should_incr = C(0)
                 if i == 0:
-                    continue
-
-                with m.If(bucket_idx == i - 1):
+                    # The first bucket has a range [0, 1).
+                    should_incr = sample == 0
+                elif i == self.bucket_count - 1:
+                    # The last bucket should count values bigger or equal to 2**(self.bucket_count-1)
+                    should_incr = (bucket_idx >= i - 1) & (sample != 0)
+                else:
+                    should_incr = (bucket_idx == i - 1) & (sample != 0)
+
+                with m.If(should_incr):
                     m.d.sync += bucket.value.eq(bucket.value + 1)
 
         return m
@@ -386,18 +394,26 @@ def __init__(
         self._start = Method()
         self._stop = Method()
 
+        # This bucket count gives us the best possible granularity.
+        bucket_count = bits_for(self.max_latency) + 1
+        self.histogram = HwExpHistogram(
+            self.gen_params,
+            self.fully_qualified_name,
+            self.description,
+            bucket_count=bucket_count,
+            sample_width=bits_for(self.max_latency),
+        )
+
     def elaborate(self, platform):
         if not self.gen_params.hardware_metrics_enabled:
-            return Module()
+            return TModule()
 
         m = TModule()
 
         epoch_width = bits_for(self.max_latency)
 
         m.submodules.fifo = self.fifo = FIFO([("epoch", epoch_width)], self.slots_number)
-        m.submodules.histogram = self.histogram = HwExpHistogram(
-            self.gen_params, self.fully_qualified_name, self.description, max_value=self.max_latency
-        )
+        m.submodules.histogram = self.histogram
 
         epoch = Signal(epoch_width)
 
@@ -500,9 +516,11 @@ def get_register_value(self, metric_name: str, reg_name: str) -> Signal:
         Parameters
         ----------
         metric_name: str
-            The name of the metric.
+            The fully qualified name of the metric, for example 'frontend.icache.loads'.
         reg_name: str
-            The name of the register.
+            The name of the register from that metric, for example if
+            the metric is a histogram, the 'reg_name' could be 'min'
+            or 'bucket-32'.
         """
 
         metrics = self.get_metrics()
diff --git a/test/structs_common/test_hw_metrics.py b/test/structs_common/test_hw_metrics.py
index 564ec89c3..19e81d2d2 100644
--- a/test/structs_common/test_hw_metrics.py
+++ b/test/structs_common/test_hw_metrics.py
@@ -1,9 +1,11 @@
 import json
 import random
+import queue
+from parameterized import parameterized_class
 
 from amaranth import *
 
-from coreblocks.structs_common.hw_metrics import HwCounter, HwExpHistogram, HardwareMetricsManager
+from coreblocks.structs_common.hw_metrics import *
 from coreblocks.params import GenParams
 from coreblocks.params.configurations import test_core_config
 from transactron import *
@@ -41,7 +43,7 @@ def elaborate(self, platform):
 
         @def_method(m, self.method)
         def _(cond):
-            self.counter.incr_when(m, cond)
+            self.counter.incr(m, cond=cond)
 
         return m
 
@@ -57,7 +59,7 @@ def elaborate(self, platform):
         m.submodules.counter = self.counter
 
         with Transaction().body(m):
-            self.counter.incr_when(m, self.cond)
+            self.counter.incr(m, cond=self.cond)
 
         return m
 
@@ -81,8 +83,9 @@ def test_process():
                 else:
                     yield
 
-                val = yield m._dut.counter.count.value
-                self.assertEqual(val, called_cnt)
+                # Note that it takes one cycle to update the register value, so here
+                # we are comparing the "previous" values.
+                self.assertEqual(called_cnt, (yield m._dut.counter.count.value))
 
                 if call_now:
                     called_cnt += 1
@@ -104,8 +107,9 @@ def test_process():
                 else:
                     yield
 
-                val = yield m._dut.counter.count.value
-                self.assertEqual(val, called_cnt)
+                # Note that it takes one cycle to update the register value, so here
+                # we are comparing the "previous" values.
+                self.assertEqual(called_cnt, (yield m._dut.counter.count.value))
 
                 if call_now and condition == 1:
                     called_cnt += 1
@@ -124,8 +128,9 @@ def test_process():
                 yield m.cond.eq(condition)
                 yield
 
-                val = yield m.counter.count.value
-                self.assertEqual(val, called_cnt)
+                # Note that it takes one cycle to update the register value, so here
+                # we are comparing the "previous" values.
+                self.assertEqual(called_cnt, (yield m.counter.count.value))
 
                 if condition == 1:
                     called_cnt += 1
@@ -135,9 +140,11 @@ def test_process():
 
 
 class ExpHistogramCircuit(Elaboratable):
-    def __init__(self, gen_params: GenParams, max_value: int):
+    def __init__(self, gen_params: GenParams, bucket_cnt: int, sample_width: int):
+        self.sample_width = sample_width
+
         self.method = Method(i=data_layout(32))
-        self.histogram = HwExpHistogram(gen_params, "histogram", max_value=max_value)
+        self.histogram = HwExpHistogram(gen_params, "histogram", bucket_count=bucket_cnt, sample_width=sample_width)
 
     def elaborate(self, platform):
         m = TModule()
@@ -146,46 +153,57 @@ def elaborate(self, platform):
 
         @def_method(m, self.method)
         def _(data):
-            self.histogram.add(m, data[0 : self.histogram.max_sample_width])
+            self.histogram.add(m, data[0 : self.sample_width])
 
         return m
 
 
+@parameterized_class(
+    ("bucket_count", "sample_width"),
+    [
+        (5, 5),  # last bucket is [8, inf), max sample=31
+        (8, 5),  # last bucket is [64, inf), max sample=31
+        (8, 6),  # last bucket is [64, inf), max sample=63
+        (8, 20),  # last bucket is [64, inf), max sample=big
+    ],
+)
 class TestHwHistogram(TestCaseWithSimulator):
+    bucket_count: int
+    sample_width: int
+
     def setUp(self) -> None:
         self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
 
         random.seed(42)
 
-    def test_counter_in_method(self):
-        max_value = 100
+    def test_histogram(self):
+        m = SimpleTestCircuit(
+            ExpHistogramCircuit(self.gen_params, bucket_cnt=self.bucket_count, sample_width=self.sample_width)
+        )
 
-        m = SimpleTestCircuit(ExpHistogramCircuit(self.gen_params, max_value))
+        max_sample_value = 2**self.sample_width - 1
 
         def test_process():
-            min = max_value + 1
+            min = max_sample_value + 1
             max = 0
             sum = 0
             count = 0
 
-            bucket_cnt = 8
-            buckets = [0] * bucket_cnt
+            buckets = [0] * self.bucket_count
 
             for _ in range(500):
                 if random.randrange(3) == 0:
-                    value = random.randint(0, max_value)
-                    value = 6
+                    value = random.randint(0, max_sample_value)
                     if value < min:
                         min = value
                     if value > max:
                         max = value
                     sum += value
                     count += 1
-                    for i in range(bucket_cnt):
-                        if value < 2**i:
+                    for i in range(self.bucket_count):
+                        if value < 2**i or i == self.bucket_count - 1:
                             buckets[i] += 1
                             break
-
                     yield from m.method.call(data=value)
                     yield
                 else:
@@ -193,20 +211,109 @@ def test_process():
 
                 histogram = m._dut.histogram
                 # Skip the assertion if the min is still uninitialized
-                if min != max_value + 1:
+                if min != max_sample_value + 1:
                     self.assertEqual(min, (yield histogram.min.value))
 
                 self.assertEqual(max, (yield histogram.max.value))
                 self.assertEqual(sum, (yield histogram.sum.value))
                 self.assertEqual(count, (yield histogram.count.value))
 
-                for i in range(bucket_cnt):
-                    self.assertEqual(buckets[i], (yield histogram.buckets[i].value))
+                total_count = 0
+                for i in range(self.bucket_count):
+                    bucket_value = yield histogram.buckets[i].value
+                    total_count += bucket_value
+                    self.assertEqual(buckets[i], bucket_value)
+
+                # Sanity check if all buckets sum up to the total count value
+                self.assertEqual(total_count, (yield histogram.count.value))
 
         with self.run_simulation(m) as sim:
             sim.add_sync_process(test_process)
 
 
+@parameterized_class(
+    ("slots_number", "expected_consumer_wait"),
+    [
+        (2, 5),
+        (2, 10),
+        (5, 10),
+        (10, 1),
+        (10, 10),
+        (5, 5),
+    ],
+)
+class TestLatencyMeasurer(TestCaseWithSimulator):
+    slots_number: int
+    expected_consumer_wait: float
+
+    def setUp(self) -> None:
+        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
+
+        random.seed(42)
+
+    def test_latency_measurer(self):
+        m = SimpleTestCircuit(
+            LatencyMeasurer(self.gen_params, "latency", slots_number=self.slots_number, max_latency=300)
+        )
+
+        latencies: list[int] = []
+
+        event_queue = queue.Queue()
+
+        time = 0
+
+        def ticker():
+            nonlocal time
+
+            yield Passive()
+
+            while True:
+                yield
+                time += 1
+
+        finish = False
+
+        def producer():
+            nonlocal finish
+
+            for _ in range(200):
+                yield from m._start.call()
+
+                # Make sure that the time is updated first.
+                yield Settle()
+                event_queue.put(time)
+                yield from self.random_wait_geom(0.8)
+
+            finish = True
+
+        def consumer():
+            while not finish:
+                yield from m._stop.call()
+
+                # Make sure that the time is updated first.
+                yield Settle()
+                latencies.append(time - event_queue.get())
+
+                yield from self.random_wait_geom(1.0 / self.expected_consumer_wait)
+
+            self.assertEqual(min(latencies), (yield m._dut.histogram.min.value))
+            self.assertEqual(max(latencies), (yield m._dut.histogram.max.value))
+            self.assertEqual(sum(latencies), (yield m._dut.histogram.sum.value))
+            self.assertEqual(len(latencies), (yield m._dut.histogram.count.value))
+
+            for i in range(m._dut.histogram.bucket_count):
+                bucket_start = 0 if i == 0 else 2 ** (i - 1)
+                bucket_end = 1e10 if i == m._dut.histogram.bucket_count - 1 else 2**i
+
+                count = sum(1 for x in latencies if bucket_start <= x < bucket_end)
+                self.assertEqual(count, (yield m._dut.histogram.buckets[i].value))
+
+        with self.run_simulation(m) as sim:
+            sim.add_sync_process(producer)
+            sim.add_sync_process(consumer)
+            sim.add_sync_process(ticker)
+
+
 class MetricManagerTestCircuit(Elaboratable):
     def __init__(self, gen_params: GenParams):
         self.incr_counters = Method(i=[("counter1", 1), ("counter2", 1), ("counter3", 1)])
@@ -222,9 +329,9 @@ def elaborate(self, platform):
 
         @def_method(m, self.incr_counters)
         def _(counter1, counter2, counter3):
-            self.counter1.incr_when(m, counter1)
-            self.counter2.incr_when(m, counter2)
-            self.counter3.incr_when(m, counter3)
+            self.counter1.incr(m, cond=counter1)
+            self.counter2.incr(m, cond=counter2)
+            self.counter3.incr(m, cond=counter3)
 
         return m
 

From 653994f4b0493003f12c2fb736779e7b1a8a5481 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Sun, 11 Feb 2024 13:46:38 +0100
Subject: [PATCH 3/9] Remove the dependency on GenParams

---
 coreblocks/structs_common/hw_metrics.py | 86 +++++++++++------------
 test/structs_common/test_hw_metrics.py  | 93 +++++++++++--------------
 transactron/utils/dependencies.py       | 10 +++
 3 files changed, 90 insertions(+), 99 deletions(-)

diff --git a/coreblocks/structs_common/hw_metrics.py b/coreblocks/structs_common/hw_metrics.py
index 1662a90ee..23ed99fd4 100644
--- a/coreblocks/structs_common/hw_metrics.py
+++ b/coreblocks/structs_common/hw_metrics.py
@@ -10,8 +10,7 @@
 from transactron import Method, def_method, TModule
 from transactron.utils import SignalBundle
 from transactron.lib import FIFO
-from coreblocks.params.genparams import GenParams
-from transactron.utils.dependencies import DependencyManager, ListKey
+from transactron.utils.dependencies import ListKey, DependencyContext, SimpleKey
 
 __all__ = [
     "MetricRegisterModel",
@@ -21,6 +20,7 @@
     "HwExpHistogram",
     "LatencyMeasurer",
     "HardwareMetricsManager",
+    "HwMetricsEnabledKey",
 ]
 
 
@@ -106,10 +106,21 @@ def __init__(self, name: str, width_bits: int, description: str = "", reset: int
 class HwMetricsListKey(ListKey["HwMetric"]):
     """DependencyManager key collecting hardware metrics globally as a list."""
 
-    # This key is defined here, because it is only used internally by HwMetric and HardwareMetricsManager
     pass
 
 
+@dataclass(frozen=True)
+class HwMetricsEnabledKey(SimpleKey[bool]):
+    """
+    DependencyManager key for enabling hardware metrics. If metrics are disabled,
+    none of theirs signals will be synthesized.
+    """
+
+    lock_on_get = False
+    empty_valid = True
+    default_value = False
+
+
 class HwMetric(ABC, MetricModel):
     """
     A base for all metric implementations. It should be only used for declaring
@@ -119,18 +130,14 @@ class HwMetric(ABC, MetricModel):
 
     Attributes
     ----------
-    gen_params: GenParams
-        Core generation parameters.
     signals: dict[str, Signal]
         A mapping from a register name to a Signal containing the value of that register.
     """
 
-    def __init__(self, gen_params: GenParams, fully_qualified_name: str, description: str):
+    def __init__(self, fully_qualified_name: str, description: str):
         """
         Parameters
         ----------
-        gen_params: GenParams
-            Core generation parameters.
         fully_qualified_name: str
             The fully qualified name of the metric.
         description: str
@@ -138,12 +145,10 @@ def __init__(self, gen_params: GenParams, fully_qualified_name: str, description
         """
         super().__init__(fully_qualified_name, description)
 
-        self.gen_params = gen_params
-
         self.signals: dict[str, Signal] = {}
 
-        # add the metric to the global list
-        gen_params.get(DependencyManager).add_dependency(HwMetricsListKey(), self)
+        # add the metric to the global list of all metrics
+        DependencyContext.get().add_dependency(HwMetricsListKey(), self)
 
     def add_registers(self, regs: list[HwMetricRegister]):
         """
@@ -162,6 +167,9 @@ def add_registers(self, regs: list[HwMetricRegister]):
             self.regs[reg.name] = reg
             self.signals[reg.name] = reg.value
 
+    def metrics_enabled(self) -> bool:
+        return DependencyContext.get().get_dependency(HwMetricsEnabledKey())
+
 
 class HwCounter(Elaboratable, HwMetric):
     """Hardware Counter
@@ -169,33 +177,28 @@ class HwCounter(Elaboratable, HwMetric):
     The most basic hardware metric that can just increase its value.
     """
 
-    def __init__(self, gen_params: GenParams, fully_qualified_name: str, description: str = "", *, width_bits: int = 0):
+    def __init__(self, fully_qualified_name: str, description: str = "", *, width_bits: int = 32):
         """
         Parameters
         ----------
-        gen_params: GenParams
-            Core generation parameters.
         fully_qualified_name: str
             The fully qualified name of the metric.
         description: str
             A human-readable description of the metric's functionality.
         width_bits: int
-            The bit-width of the register. If unspecified or equal to 0,
-            the register will have `xlen` bits width.
+            The bit-width of the register. Defaults to 32 bits.
         """
 
-        super().__init__(gen_params, fully_qualified_name, description)
+        super().__init__(fully_qualified_name, description)
 
-        self.count = HwMetricRegister(
-            "count", gen_params.isa.xlen if width_bits == 0 else width_bits, "the value of the counter"
-        )
+        self.count = HwMetricRegister("count", width_bits, "the value of the counter")
 
         self.add_registers([self.count])
 
         self._incr = Method()
 
     def elaborate(self, platform):
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return TModule()
 
         m = TModule()
@@ -217,7 +220,7 @@ def incr(self, m: TModule, *, cond: ValueLike = C(1)):
         m: TModule
             Transactron module
         """
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return
 
         with m.If(cond):
@@ -239,7 +242,6 @@ class HwExpHistogram(Elaboratable, HwMetric):
 
     def __init__(
         self,
-        gen_params: GenParams,
         fully_qualified_name: str,
         description: str = "",
         *,
@@ -250,8 +252,6 @@ def __init__(
         """
         Parameters
         ----------
-        gen_params: GenParams
-            Core generation parameters.
         fully_qualified_name: str
             The fully qualified name of the metric.
         description: str
@@ -261,7 +261,7 @@ def __init__(
             value is used to calculate the number of buckets.
         """
 
-        super().__init__(gen_params, fully_qualified_name, description)
+        super().__init__(fully_qualified_name, description)
         self.bucket_count = bucket_count
         self.sample_width = sample_width
 
@@ -293,7 +293,7 @@ def __init__(
         self.add_registers([self.count, self.sum, self.max, self.min] + self.buckets)
 
     def elaborate(self, platform):
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return TModule()
 
         m = TModule()
@@ -345,7 +345,7 @@ def add(self, m: TModule, sample: Value):
             The value that will be added to the histogram
         """
 
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return
 
         self._add(m, sample)
@@ -363,7 +363,6 @@ class LatencyMeasurer(Elaboratable):
 
     def __init__(
         self,
-        gen_params: GenParams,
         fully_qualified_name: str,
         description: str = "",
         *,
@@ -373,10 +372,10 @@ def __init__(
         """
         Parameters
         ----------
-        gen_params: GenParams
-            Core generation parameters.
         fully_qualified_name: str
             The fully qualified name of the metric.
+        description: str
+            A human-readable description of the metric's functionality.
         slots_number: str
             A number of events that the module can track simultaneously.
         max_latency: int
@@ -385,7 +384,6 @@ def __init__(
             bigger than the maximum, it will overflow and result in a false
             measurement.
         """
-        self.gen_params = gen_params
         self.fully_qualified_name = fully_qualified_name
         self.description = description
         self.slots_number = slots_number
@@ -397,7 +395,6 @@ def __init__(
         # This bucket count gives us the best possible granularity.
         bucket_count = bits_for(self.max_latency) + 1
         self.histogram = HwExpHistogram(
-            self.gen_params,
             self.fully_qualified_name,
             self.description,
             bucket_count=bucket_count,
@@ -405,7 +402,7 @@ def __init__(
         )
 
     def elaborate(self, platform):
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return TModule()
 
         m = TModule()
@@ -446,7 +443,7 @@ def start(self, m: TModule):
             Transactron module
         """
 
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return
 
         self._start(m)
@@ -464,11 +461,14 @@ def stop(self, m: TModule):
             Transactron module
         """
 
-        if not self.gen_params.hardware_metrics_enabled:
+        if not self.metrics_enabled():
             return
 
         self._stop(m)
 
+    def metrics_enabled(self) -> bool:
+        return DependencyContext.get().get_dependency(HwMetricsEnabledKey())
+
 
 class HardwareMetricsManager:
     """
@@ -476,15 +476,7 @@ class HardwareMetricsManager:
     access to them.
     """
 
-    def __init__(self, dep_manager: DependencyManager):
-        """
-        Parameters
-        ----------
-        dep_manager: DependencyManager
-            The dependency manager of the core.
-        """
-        self.dep_manager = dep_manager
-
+    def __init__(self):
         self._metrics: Optional[dict[str, HwMetric]] = None
 
     def _collect_metrics(self) -> dict[str, HwMetric]:
@@ -493,7 +485,7 @@ def _collect_metrics(self) -> dict[str, HwMetric]:
         # after the manager object had been created, that metric wouldn't end up
         # being registered.
         metrics: dict[str, HwMetric] = {}
-        for metric in self.dep_manager.get_dependency(HwMetricsListKey()):
+        for metric in DependencyContext.get().get_dependency(HwMetricsListKey()):
             if metric.fully_qualified_name in metrics:
                 raise RuntimeError(f"Metric '{metric.fully_qualified_name}' is already registered")
 
diff --git a/test/structs_common/test_hw_metrics.py b/test/structs_common/test_hw_metrics.py
index 19e81d2d2..ddea30500 100644
--- a/test/structs_common/test_hw_metrics.py
+++ b/test/structs_common/test_hw_metrics.py
@@ -4,20 +4,20 @@
 from parameterized import parameterized_class
 
 from amaranth import *
+from amaranth.sim import Passive, Settle
 
 from coreblocks.structs_common.hw_metrics import *
-from coreblocks.params import GenParams
-from coreblocks.params.configurations import test_core_config
 from transactron import *
-from transactron.utils import DependencyManager
+from transactron.testing import TestCaseWithSimulator, data_layout, SimpleTestCircuit
+from transactron.utils.dependencies import DependencyContext
 
 from ..common import *
 
 
 class CounterInMethodCircuit(Elaboratable):
-    def __init__(self, gen_params: GenParams):
+    def __init__(self):
         self.method = Method()
-        self.counter = HwCounter(gen_params, "in_method")
+        self.counter = HwCounter("in_method")
 
     def elaborate(self, platform):
         m = TModule()
@@ -32,9 +32,9 @@ def _():
 
 
 class CounterWithConditionInMethodCircuit(Elaboratable):
-    def __init__(self, gen_params: GenParams):
+    def __init__(self):
         self.method = Method(i=[("cond", 1)])
-        self.counter = HwCounter(gen_params, "with_condition_in_method")
+        self.counter = HwCounter("with_condition_in_method")
 
     def elaborate(self, platform):
         m = TModule()
@@ -49,9 +49,9 @@ def _(cond):
 
 
 class CounterWithoutMethodCircuit(Elaboratable):
-    def __init__(self, gen_params: GenParams):
+    def __init__(self):
         self.cond = Signal()
-        self.counter = HwCounter(gen_params, "with_condition_without_method")
+        self.counter = HwCounter("with_condition_without_method")
 
     def elaborate(self, platform):
         m = TModule()
@@ -66,12 +66,11 @@ def elaborate(self, platform):
 
 class TestHwCounter(TestCaseWithSimulator):
     def setUp(self) -> None:
-        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
-
         random.seed(42)
 
     def test_counter_in_method(self):
-        m = SimpleTestCircuit(CounterInMethodCircuit(self.gen_params))
+        m = SimpleTestCircuit(CounterInMethodCircuit())
+        DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True)
 
         def test_process():
             called_cnt = 0
@@ -94,7 +93,8 @@ def test_process():
             sim.add_sync_process(test_process)
 
     def test_counter_with_condition_in_method(self):
-        m = SimpleTestCircuit(CounterWithConditionInMethodCircuit(self.gen_params))
+        m = SimpleTestCircuit(CounterWithConditionInMethodCircuit())
+        DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True)
 
         def test_process():
             called_cnt = 0
@@ -118,7 +118,8 @@ def test_process():
             sim.add_sync_process(test_process)
 
     def test_counter_with_condition_without_method(self):
-        m = CounterWithoutMethodCircuit(self.gen_params)
+        m = CounterWithoutMethodCircuit()
+        DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True)
 
         def test_process():
             called_cnt = 0
@@ -140,11 +141,11 @@ def test_process():
 
 
 class ExpHistogramCircuit(Elaboratable):
-    def __init__(self, gen_params: GenParams, bucket_cnt: int, sample_width: int):
+    def __init__(self, bucket_cnt: int, sample_width: int):
         self.sample_width = sample_width
 
         self.method = Method(i=data_layout(32))
-        self.histogram = HwExpHistogram(gen_params, "histogram", bucket_count=bucket_cnt, sample_width=sample_width)
+        self.histogram = HwExpHistogram("histogram", bucket_count=bucket_cnt, sample_width=sample_width)
 
     def elaborate(self, platform):
         m = TModule()
@@ -171,15 +172,11 @@ class TestHwHistogram(TestCaseWithSimulator):
     bucket_count: int
     sample_width: int
 
-    def setUp(self) -> None:
-        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
-
+    def test_histogram(self):
         random.seed(42)
 
-    def test_histogram(self):
-        m = SimpleTestCircuit(
-            ExpHistogramCircuit(self.gen_params, bucket_cnt=self.bucket_count, sample_width=self.sample_width)
-        )
+        m = SimpleTestCircuit(ExpHistogramCircuit(bucket_cnt=self.bucket_count, sample_width=self.sample_width))
+        DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True)
 
         max_sample_value = 2**self.sample_width - 1
 
@@ -246,15 +243,11 @@ class TestLatencyMeasurer(TestCaseWithSimulator):
     slots_number: int
     expected_consumer_wait: float
 
-    def setUp(self) -> None:
-        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
-
+    def test_latency_measurer(self):
         random.seed(42)
 
-    def test_latency_measurer(self):
-        m = SimpleTestCircuit(
-            LatencyMeasurer(self.gen_params, "latency", slots_number=self.slots_number, max_latency=300)
-        )
+        m = SimpleTestCircuit(LatencyMeasurer("latency", slots_number=self.slots_number, max_latency=300))
+        DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True)
 
         latencies: list[int] = []
 
@@ -315,12 +308,12 @@ def consumer():
 
 
 class MetricManagerTestCircuit(Elaboratable):
-    def __init__(self, gen_params: GenParams):
+    def __init__(self):
         self.incr_counters = Method(i=[("counter1", 1), ("counter2", 1), ("counter3", 1)])
 
-        self.counter1 = HwCounter(gen_params, "foo.counter1", "this is the description")
-        self.counter2 = HwCounter(gen_params, "bar.baz.counter2")
-        self.counter3 = HwCounter(gen_params, "bar.baz.counter3", "yet another description")
+        self.counter1 = HwCounter("foo.counter1", "this is the description")
+        self.counter2 = HwCounter("bar.baz.counter2")
+        self.counter3 = HwCounter("bar.baz.counter3", "yet another description")
 
     def elaborate(self, platform):
         m = TModule()
@@ -337,23 +330,18 @@ def _(counter1, counter2, counter3):
 
 
 class TestMetricsManager(TestCaseWithSimulator):
-    def setUp(self) -> None:
-        self.gen_params = GenParams(test_core_config.replace(hardware_metrics=True))
-        self.metrics_manager = HardwareMetricsManager(self.gen_params.get(DependencyManager))
-
-        random.seed(42)
-
     def test_metrics_metadata(self):
         # We need to initialize the circuit to make sure that metrics are registered
         # in the dependency manager.
-        m = MetricManagerTestCircuit(self.gen_params)
+        m = MetricManagerTestCircuit()
+        metrics_manager = HardwareMetricsManager()
 
         # Run the simulation so Amaranth doesn't scream that we have unused elaboratables.
         with self.run_simulation(m):
             pass
 
         self.assertEqual(
-            self.metrics_manager.get_metrics()["foo.counter1"].to_json(),  # type: ignore
+            metrics_manager.get_metrics()["foo.counter1"].to_json(),  # type: ignore
             json.dumps(
                 {
                     "fully_qualified_name": "foo.counter1",
@@ -364,7 +352,7 @@ def test_metrics_metadata(self):
         )
 
         self.assertEqual(
-            self.metrics_manager.get_metrics()["bar.baz.counter2"].to_json(),  # type: ignore
+            metrics_manager.get_metrics()["bar.baz.counter2"].to_json(),  # type: ignore
             json.dumps(
                 {
                     "fully_qualified_name": "bar.baz.counter2",
@@ -375,7 +363,7 @@ def test_metrics_metadata(self):
         )
 
         self.assertEqual(
-            self.metrics_manager.get_metrics()["bar.baz.counter3"].to_json(),  # type: ignore
+            metrics_manager.get_metrics()["bar.baz.counter3"].to_json(),  # type: ignore
             json.dumps(
                 {
                     "fully_qualified_name": "bar.baz.counter3",
@@ -386,7 +374,12 @@ def test_metrics_metadata(self):
         )
 
     def test_returned_reg_values(self):
-        m = SimpleTestCircuit(MetricManagerTestCircuit(self.gen_params))
+        random.seed(42)
+
+        m = SimpleTestCircuit(MetricManagerTestCircuit())
+        metrics_manager = HardwareMetricsManager()
+
+        DependencyContext.get().add_dependency(HwMetricsEnabledKey(), True)
 
         def test_process():
             counters = [0] * 3
@@ -400,13 +393,9 @@ def test_process():
                     if rand[i] == 1:
                         counters[i] += 1
 
-                self.assertEqual(counters[0], (yield self.metrics_manager.get_register_value("foo.counter1", "count")))
-                self.assertEqual(
-                    counters[1], (yield self.metrics_manager.get_register_value("bar.baz.counter2", "count"))
-                )
-                self.assertEqual(
-                    counters[2], (yield self.metrics_manager.get_register_value("bar.baz.counter3", "count"))
-                )
+                self.assertEqual(counters[0], (yield metrics_manager.get_register_value("foo.counter1", "count")))
+                self.assertEqual(counters[1], (yield metrics_manager.get_register_value("bar.baz.counter2", "count")))
+                self.assertEqual(counters[2], (yield metrics_manager.get_register_value("bar.baz.counter3", "count")))
 
         with self.run_simulation(m) as sim:
             sim.add_sync_process(test_process)
diff --git a/transactron/utils/dependencies.py b/transactron/utils/dependencies.py
index f365e17b9..2d35843ec 100644
--- a/transactron/utils/dependencies.py
+++ b/transactron/utils/dependencies.py
@@ -62,9 +62,19 @@ class SimpleKey(Generic[T], DependencyKey[T, T]):
     Simple dependency keys are used when there is an one-to-one relation between
     keys and dependencies. If more than one dependency is added to a simple key,
     an error is raised.
+
+    Parameters
+    ----------
+    default_value: T
+        Specifies the value default returned when no dependencies are added. To
+        enable this, `empty_valid` must be True.
     """
 
+    default_value: T
+
     def combine(self, data: list[T]) -> T:
+        if len(data) == 0:
+            return self.default_value
         if len(data) != 1:
             raise RuntimeError(f"Key {self} assigned {len(data)} values, expected 1")
         return data[0]

From 38c56132a167fe67500266e960468c19ad231723 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Sun, 11 Feb 2024 13:54:29 +0100
Subject: [PATCH 4/9] Remove metrics options from CoreConfiguration

---
 coreblocks/params/configurations.py | 4 ----
 coreblocks/params/genparams.py      | 2 --
 2 files changed, 6 deletions(-)

diff --git a/coreblocks/params/configurations.py b/coreblocks/params/configurations.py
index d8c1b0740..a289c09e5 100644
--- a/coreblocks/params/configurations.py
+++ b/coreblocks/params/configurations.py
@@ -48,8 +48,6 @@ class CoreConfiguration:
         Enables 16-bit Compressed Instructions extension.
     embedded: bool
         Enables Reduced Integer (E) extension.
-    hardware_metrics: bool
-        Enable hardware metrics. If disabled, metrics will not be synthesized.
     phys_regs_bits: int
         Size of the Physical Register File is 2**phys_regs_bits.
     rob_entries_bits: int
@@ -78,8 +76,6 @@ class CoreConfiguration:
     compressed: bool = False
     embedded: bool = False
 
-    hardware_metrics: bool = True
-
     phys_regs_bits: int = 6
     rob_entries_bits: int = 7
     start_pc: int = 0
diff --git a/coreblocks/params/genparams.py b/coreblocks/params/genparams.py
index a6205a128..916832b49 100644
--- a/coreblocks/params/genparams.py
+++ b/coreblocks/params/genparams.py
@@ -47,8 +47,6 @@ def __init__(self, cfg: CoreConfiguration):
             block_size_bits=cfg.icache_block_size_bits,
         )
 
-        self.hardware_metrics_enabled = cfg.hardware_metrics
-
         # Verification temporally disabled
         # if not optypes_required_by_extensions(self.isa.extensions) <= optypes_supported(func_units_config):
         #     raise Exception(f"Functional unit configuration fo not support all extension required by{isa_str}")

From 7fe256b8493315c7562d099054d773516307d3b3 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Sun, 11 Feb 2024 20:29:54 +0100
Subject: [PATCH 5/9] Move metrics to transactron

---
 test/{structs_common => transactron}/test_hw_metrics.py         | 2 +-
 transactron/lib/__init__.py                                     | 1 +
 .../structs_common/hw_metrics.py => transactron/lib/metrics.py  | 0
 3 files changed, 2 insertions(+), 1 deletion(-)
 rename test/{structs_common => transactron}/test_hw_metrics.py (99%)
 rename coreblocks/structs_common/hw_metrics.py => transactron/lib/metrics.py (100%)

diff --git a/test/structs_common/test_hw_metrics.py b/test/transactron/test_hw_metrics.py
similarity index 99%
rename from test/structs_common/test_hw_metrics.py
rename to test/transactron/test_hw_metrics.py
index ddea30500..a9a4a0628 100644
--- a/test/structs_common/test_hw_metrics.py
+++ b/test/transactron/test_hw_metrics.py
@@ -6,7 +6,7 @@
 from amaranth import *
 from amaranth.sim import Passive, Settle
 
-from coreblocks.structs_common.hw_metrics import *
+from transactron.lib.metrics import *
 from transactron import *
 from transactron.testing import TestCaseWithSimulator, data_layout, SimpleTestCircuit
 from transactron.utils.dependencies import DependencyContext
diff --git a/transactron/lib/__init__.py b/transactron/lib/__init__.py
index c814b5e93..f6dd3ef0a 100644
--- a/transactron/lib/__init__.py
+++ b/transactron/lib/__init__.py
@@ -6,3 +6,4 @@
 from .reqres import *  # noqa: F401
 from .storage import *  # noqa: F401
 from .simultaneous import *  # noqa: F401
+from .metrics import *  # noqa: F401
diff --git a/coreblocks/structs_common/hw_metrics.py b/transactron/lib/metrics.py
similarity index 100%
rename from coreblocks/structs_common/hw_metrics.py
rename to transactron/lib/metrics.py

From 1116f65bbc737aa7998c27bae7f2b54a04bd7744 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Sun, 11 Feb 2024 20:32:17 +0100
Subject: [PATCH 6/9] s/core/circuit/g

---
 transactron/lib/metrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/transactron/lib/metrics.py b/transactron/lib/metrics.py
index 23ed99fd4..0a9e79095 100644
--- a/transactron/lib/metrics.py
+++ b/transactron/lib/metrics.py
@@ -28,7 +28,7 @@
 @dataclass(frozen=True)
 class MetricRegisterModel:
     """
-    Represents a single register of a core metric, serving as a fundamental
+    Represents a single register of a metric, serving as a fundamental
     building block that holds a singular value.
 
     Attributes
@@ -51,7 +51,7 @@ class MetricRegisterModel:
 @dataclass
 class MetricModel:
     """
-    Provides information about a metric exposed by the core. Each metric
+    Provides information about a metric exposed by the circuit. Each metric
     comprises multiple registers, each dedicated to storing specific values.
 
     The configuration of registers is internally determined by a
@@ -472,7 +472,7 @@ def metrics_enabled(self) -> bool:
 
 class HardwareMetricsManager:
     """
-    Collects all metrics registered in the core and provides an easy
+    Collects all metrics registered in the circuit and provides an easy
     access to them.
     """
 
@@ -495,7 +495,7 @@ def _collect_metrics(self) -> dict[str, HwMetric]:
 
     def get_metrics(self) -> dict[str, HwMetric]:
         """
-        Returns all metrics registered in the core.
+        Returns all metrics registered in the circuit.
         """
         if self._metrics is None:
             self._metrics = self._collect_metrics()

From af803289563a2595a250d5a4fe9ea6c39e25b425 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Sun, 11 Feb 2024 20:34:01 +0100
Subject: [PATCH 7/9] Rename file

---
 test/transactron/{test_hw_metrics.py => test_metrics.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/transactron/{test_hw_metrics.py => test_metrics.py} (100%)

diff --git a/test/transactron/test_hw_metrics.py b/test/transactron/test_metrics.py
similarity index 100%
rename from test/transactron/test_hw_metrics.py
rename to test/transactron/test_metrics.py

From d3077c3d5a4ff107fa7309af793b4910030704f9 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Sun, 11 Feb 2024 20:35:08 +0100
Subject: [PATCH 8/9] grammar

---
 transactron/utils/dependencies.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/transactron/utils/dependencies.py b/transactron/utils/dependencies.py
index 2d35843ec..e66a7a23b 100644
--- a/transactron/utils/dependencies.py
+++ b/transactron/utils/dependencies.py
@@ -66,8 +66,8 @@ class SimpleKey(Generic[T], DependencyKey[T, T]):
     Parameters
     ----------
     default_value: T
-        Specifies the value default returned when no dependencies are added. To
-        enable this, `empty_valid` must be True.
+        Specifies the default value returned when no dependencies are added. To
+        enable it `empty_valid` must be True.
     """
 
     default_value: T

From b304092754af25fb143e9cfc8dcd6ffd18d9f554 Mon Sep 17 00:00:00 2001
From: Jacob Urbanczyk <jacob@urbanczyk.dev>
Date: Mon, 12 Feb 2024 09:49:12 +0100
Subject: [PATCH 9/9] Fix imports in tests

---
 test/transactron/test_metrics.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/transactron/test_metrics.py b/test/transactron/test_metrics.py
index a9a4a0628..12acdfd27 100644
--- a/test/transactron/test_metrics.py
+++ b/test/transactron/test_metrics.py
@@ -11,8 +11,6 @@
 from transactron.testing import TestCaseWithSimulator, data_layout, SimpleTestCircuit
 from transactron.utils.dependencies import DependencyContext
 
-from ..common import *
-
 
 class CounterInMethodCircuit(Elaboratable):
     def __init__(self):