Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove func blocks unifier indirections #774

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 6 additions & 13 deletions coreblocks/backend/annoucement.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ class ResultAnnouncement(Elaboratable):
`ManyToOneConnectTrans` to a FIFO.
"""

def __init__(
self, *, gen_params: GenParams, get_result: Method, rob_mark_done: Method, rs_update: Method, rf_write: Method
):
def __init__(self, *, gen_params: GenParams, get_result: Method, rob_mark_done: Method, announce: Method):
"""
Parameters
----------
Expand All @@ -33,18 +31,14 @@ def __init__(
which should be announced in core. This method assumes that results
from different FUs are already serialized.
rob_mark_done : Method
Method which is invoked to mark that instruction ended without exception.
rs_update : Method
Method which is invoked to pass value which is an output of finished instruction
to RS, so that RS can save it if there are instructions which wait for it.
rf_write : Method
Method which is invoked to save value which is an output of finished instruction to RF.
Method which is invoked to mark that instruction finished execution.
announce : Method
Method which is invoked to announce the computed register value to RF and RS.
"""

self.m_get_result = get_result
self.m_rob_mark_done = rob_mark_done
self.m_rs_update = rs_update
self.m_rf_write_val = rf_write
self.m_announce = announce

def debug_signals(self):
return [self.m_get_result.debug_signals()]
Expand All @@ -56,8 +50,7 @@ def elaborate(self, platform):
result = self.m_get_result(m)
self.m_rob_mark_done(m, rob_id=result.rob_id, exception=result.exception)

self.m_rf_write_val(m, reg_id=result.rp_dst, reg_val=result.result)
with m.If(result.rp_dst != 0):
self.m_rs_update(m, reg_id=result.rp_dst, reg_val=result.result)
self.m_announce(m, reg_id=result.rp_dst, reg_val=result.result)

return m
20 changes: 12 additions & 8 deletions coreblocks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
from transactron.lib import ConnectTrans, MethodProduct
from coreblocks.interface.layouts import *
from coreblocks.interface.keys import (
AnnounceKey,
FetchResumeKey,
CSRInstancesKey,
CommonBusDataKey,
FuncUnitResultKey,
)
from coreblocks.params.genparams import GenParams
from coreblocks.core_structs.rat import FRAT, RRAT
Expand Down Expand Up @@ -68,6 +70,7 @@ def __init__(self, *, gen_params: GenParams):
self.ROB = ReorderBuffer(gen_params=self.gen_params)

self.connections.add_dependency(CommonBusDataKey(), self.bus_master_data_adapter)
self.connections.add_dependency(AnnounceKey(), self.RF.write)

self.exception_information_register = ExceptionInformationRegister(
self.gen_params,
Expand All @@ -80,14 +83,6 @@ def __init__(self, *, gen_params: GenParams):
blocks=gen_params.func_units_config,
)

self.announcement = ResultAnnouncement(
gen_params=self.gen_params,
get_result=self.func_blocks_unifier.get_result,
rob_mark_done=self.ROB.mark_done,
rs_update=self.func_blocks_unifier.update,
rf_write=self.RF.write,
)

self.csr_generic = GenericCSRRegisters(self.gen_params)
self.connections.add_dependency(CSRInstancesKey(), self.csr_generic)

Expand Down Expand Up @@ -125,6 +120,15 @@ def elaborate(self, platform):
[self.frontend.consume_instr, core_counter.increment], combiner=drop_second_ret_value
)

func_get_result, func_unifier = self.connections.get_dependency(FuncUnitResultKey())
m.submodules.func_unifiers = ModuleConnector(**func_unifier)
announce, announce_unifier = self.connections.get_dependency(AnnounceKey())
m.submodules.announce_unifiers = ModuleConnector(**announce_unifier)

self.announcement = ResultAnnouncement(
gen_params=self.gen_params, get_result=func_get_result, rob_mark_done=self.ROB.mark_done, announce=announce
)

m.submodules.scheduler = Scheduler(
get_instr=get_instr.method,
get_free_reg=rf_allocator.alloc[0],
Expand Down
4 changes: 4 additions & 0 deletions coreblocks/func_blocks/csr/csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
from coreblocks.func_blocks.interface.func_protocols import FuncBlock
from coreblocks.interface.layouts import FetchLayouts, FuncUnitLayouts, CSRUnitLayouts
from coreblocks.interface.keys import (
AnnounceKey,
CSRListKey,
FetchResumeKey,
CSRInstancesKey,
FuncUnitResultKey,
InstructionPrecommitKey,
ExceptionReportKey,
AsyncInterruptInsertSignalKey,
Expand Down Expand Up @@ -268,6 +270,8 @@ def get_module(self, gen_params: GenParams) -> FuncBlock:
connections = DependencyContext.get()
unit = CSRUnit(gen_params)
connections.add_dependency(FetchResumeKey(), unit.fetch_resume)
connections.add_dependency(FuncUnitResultKey(), unit.get_result)
connections.add_dependency(AnnounceKey(), unit.update)
return unit

def get_optypes(self) -> set[OpType]:
Expand Down
15 changes: 7 additions & 8 deletions coreblocks/func_blocks/fu/common/rs_func_block.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from collections.abc import Collection, Iterable
from amaranth import *
from dataclasses import dataclass

from transactron.utils import DependencyContext
from coreblocks.params import *
from .rs import RS, RSBase
from coreblocks.scheduler.wakeup_select import WakeupSelect
from transactron import Method, TModule
from coreblocks.func_blocks.interface.func_protocols import FuncUnit, FuncBlock
from transactron.lib import Collector
from coreblocks.arch import OpType
from coreblocks.interface.layouts import RSLayouts, FuncUnitLayouts
from coreblocks.interface.keys import AnnounceKey, FuncUnitResultKey

__all__ = ["RSFuncBlock", "RSBlockComponent"]

Expand All @@ -26,9 +28,6 @@ class RSFuncBlock(FuncBlock, Elaboratable):
RS select method.
update: Method
RS update method.
get_result: Method
Method used for getting single result out of one of the FUs. It uses
layout described by `FuncUnitLayouts`.
"""

def __init__(
Expand Down Expand Up @@ -65,7 +64,6 @@ def __init__(
self.insert = Method(i=self.rs_layouts.rs.insert_in)
self.select = Method(o=self.rs_layouts.rs.select_out)
self.update = Method(i=self.rs_layouts.rs.update_in)
self.get_result = Method(o=self.fu_layouts.accept)

def elaborate(self, platform):
m = TModule()
Expand All @@ -87,12 +85,9 @@ def elaborate(self, platform):
m.submodules[f"func_unit_{n}"] = func_unit
m.submodules[f"wakeup_select_{n}"] = wakeup_select

m.submodules.collector = collector = Collector([func_unit.accept for func_unit, _ in self.func_units])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also complicates the transactron network. Probably its is connected with observed IPC drop. Previously, when two results were ready in the same cycle, one have been announced and second stored in Forwarder for a cycle, what made the FU ready to process the next instruction. Now the FU have to stall till it can push out its result.


self.insert.proxy(m, self.rs.insert)
self.select.proxy(m, self.rs.select)
self.update.proxy(m, self.rs.update)
self.get_result.proxy(m, collector.method)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.get_result should be removed (+ in docstring too)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


return m

Expand All @@ -113,6 +108,10 @@ def get_module(self, gen_params: GenParams) -> FuncBlock:
rs_number=self.rs_number,
rs_type=self.rs_type,
)
dependencies = DependencyContext.get()
dependencies.add_dependency(AnnounceKey(), rs_unit.update)
for unit, _ in modules:
dependencies.add_dependency(FuncUnitResultKey(), unit.accept)
return rs_unit

def get_optypes(self) -> set[OpType]:
Expand Down
111 changes: 111 additions & 0 deletions coreblocks/func_blocks/fu/fpu/lza.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from amaranth import *
from amaranth.utils import ceil_log2
from transactron import TModule, Method, def_method
from coreblocks.func_blocks.fu.fpu.fpu_common import FPUParams
from transactron.utils.amaranth_ext import count_leading_zeros


class LZAMethodLayout:
"""LZA module layouts for methods

Parameters
----------
fpu_params: FPUParams
FPU parameters
"""

def __init__(self, *, fpu_params: FPUParams):
"""
sig_a - significand of a
sig_b - significand of b
carry - indicates if we want to predict result of a+b or a+b+1
shift_amount - position to shift needed to normalize number
is_zero - indicates if result is zero
"""
self.predict_in_layout = [
("sig_a", fpu_params.sig_width),
("sig_b", fpu_params.sig_width),
("carry", 1),
]
self.predict_out_layout = [
("shift_amount", range(fpu_params.sig_width)),
("is_zero", 1),
]


class LZAModule(Elaboratable):
"""LZA module
Based on: https://userpages.cs.umbc.edu/phatak/645/supl/lza/lza-survey-arith01.pdf
After performing subtracion, we may have to normalize floating point numbers and
For that, we have to know the number of leading zeros.
The most basic approach includes using LZC (leading zero counter) after subtracion,
a more advanced approach includes using LZA (Leading Zero Anticipator) to predict the number of
leading zeroes. It is worth noting that this LZA module works under assumptions that
significands are in two's complement and that before complementation sig_a was greater
or equal to sig_b. Another thing worth noting is that LZA works with error = 1.
That means that if 'n' is the result of the LZA module, in reality, to normalize
number we may have to shift left by 'n' or 'n+1'. There are few techniques of
dealing with that error like specially designed shifters or predicting the error
but the most basic approach is to just use multiplexer after shifter to perform
one more shift left if necessary.

Parameters
----------
fpu_params: FPUParams
FPU rounding module parameters

Attributes
----------
predict_request: Method
Transactional method for initiating leading zeros prediction.
Takes 'predict_in_layout' as argument
Returns shift amount as 'predict_out_layout'
"""

def __init__(self, *, fpu_params: FPUParams):

self.lza_params = fpu_params
self.method_layouts = LZAMethodLayout(fpu_params=self.lza_params)
self.predict_request = Method(
i=self.method_layouts.predict_in_layout,
o=self.method_layouts.predict_out_layout,
)

def elaborate(self, platform):
m = TModule()

@def_method(m, self.predict_request)
def _(sig_a, sig_b, carry):
f_size = 2 ** ceil_log2(self.lza_params.sig_width)
filler_size = f_size - self.lza_params.sig_width
lower_ones = Const((2**filler_size) - 1, f_size)

t = Signal(self.lza_params.sig_width + 1)
g = Signal(self.lza_params.sig_width + 1)
z = Signal(self.lza_params.sig_width + 1)
f = Signal(f_size)
shift_amount = Signal(range(self.lza_params.sig_width))
is_zero = Signal(1)

m.d.av_comb += t.eq((sig_a ^ sig_b) << 1)
m.d.av_comb += g.eq((sig_a & sig_b) << 1)
m.d.av_comb += z.eq(((sig_a | sig_b) << 1))
with m.If(carry):
m.d.av_comb += g[0].eq(1)
m.d.av_comb += z[0].eq(1)

for i in reversed(range(1, self.lza_params.sig_width + 1)):
m.d.av_comb += f[i + filler_size - 1].eq((t[i] ^ z[i - 1]))

m.d.av_comb += shift_amount.eq(0)
m.d.av_comp += f.eq(f | lower_ones)
m.d.av_comb += shift_amount.eq(count_leading_zeros(f))

m.d.av_comb += is_zero.eq((carry & t[1 : self.lza_params.sig_width].all()))

return {
"shift_amount": shift_amount,
"is_zero": is_zero,
}

return m
10 changes: 0 additions & 10 deletions coreblocks/func_blocks/interface/func_blocks_unifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from coreblocks.params import GenParams, BlockComponentParams
from transactron import TModule
from transactron.lib import MethodProduct, Collector

__all__ = ["FuncBlocksUnifier"]

Expand All @@ -18,19 +17,10 @@ def __init__(
):
self.rs_blocks = [(block.get_module(gen_params), block.get_optypes()) for block in blocks]

self.result_collector = Collector([block.get_result for block, _ in self.rs_blocks])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I correctly see, removing that Collector cause that all FUs get_results methods are joined with the announcement methods (so with RS and RF), which make scheduling more complex and critical path longer. In Collector there is hidden a Forwarder which cut the critical path on data.

self.get_result = self.result_collector.method

self.update_combiner = MethodProduct([block.update for block, _ in self.rs_blocks])
self.update = self.update_combiner.method

def elaborate(self, platform):
m = TModule()

for n, (unit, _) in enumerate(self.rs_blocks):
m.submodules[f"rs_block_{n}"] = unit

m.submodules["result_collector"] = self.result_collector
m.submodules["update_combiner"] = self.update_combiner

return m
1 change: 0 additions & 1 deletion coreblocks/func_blocks/interface/func_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,3 @@ class FuncBlock(HasElaborate, Protocol):
insert: Method
select: Method
update: Method
get_result: Method
12 changes: 11 additions & 1 deletion coreblocks/interface/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from transactron.lib.dependencies import SimpleKey, UnifierKey, ListKey
from transactron import Method
from transactron.lib import Collector
from transactron.lib import Collector, MethodProduct
from coreblocks.peripherals.bus_adapter import BusMasterInterface
from amaranth import Signal

Expand Down Expand Up @@ -52,6 +52,16 @@ class FetchResumeKey(UnifierKey, unifier=Collector):
pass


@dataclass(frozen=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should start adding the doc strings to our keys? In practice they are a global variables and we haven't documented them...

class FuncUnitResultKey(UnifierKey, unifier=Collector):
pass


@dataclass(frozen=True)
class AnnounceKey(UnifierKey, unifier=MethodProduct):
pass


@dataclass(frozen=True)
class ExceptionReportKey(SimpleKey[Method]):
pass
Expand Down
Loading
Loading