Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into tilk/metrics-for-st…
Browse files Browse the repository at this point in the history
…ruct-usage
  • Loading branch information
tilk committed Apr 1, 2024
2 parents a318903 + 6ef2f84 commit 5cfcdcc
Show file tree
Hide file tree
Showing 30 changed files with 1,007 additions and 386 deletions.
126 changes: 62 additions & 64 deletions coreblocks/cache/icache.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from coreblocks.interface.layouts import ICacheLayouts
from transactron.utils import assign, OneHotSwitchDynamic
from transactron.lib import *
from transactron.lib import logging
from coreblocks.peripherals.bus_adapter import BusMasterInterface

from coreblocks.cache.iface import CacheInterface, CacheRefillerInterface
Expand All @@ -21,19 +22,7 @@
"ICacheBypass",
]


def extract_instr_from_word(m: TModule, params: ICacheParameters, word: Signal, addr: Value):
instr_out = Signal(params.instr_width)
if len(word) == 32:
m.d.comb += instr_out.eq(word)
elif len(word) == 64:
with m.If(addr[2] == 0):
m.d.comb += instr_out.eq(word[:32]) # Take lower 4 bytes
with m.Else():
m.d.comb += instr_out.eq(word[32:]) # Take upper 4 bytes
else:
raise RuntimeError("Word size different than 32 and 64 is not supported")
return instr_out
log = logging.HardwareLogger("frontend.icache")


class ICacheBypass(Elaboratable, CacheInterface):
Expand All @@ -45,6 +34,9 @@ def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, bus_master:
self.accept_res = Method(o=layouts.accept_res)
self.flush = Method()

if params.words_in_fetch_block != 1:
raise ValueError("ICacheBypass only supports fetch block size equal to the word size.")

def elaborate(self, platform):
m = TModule()

Expand All @@ -63,7 +55,7 @@ def _(addr: Value) -> None:
def _():
res = self.bus_master.get_read_response(m)
return {
"instr": extract_instr_from_word(m, self.params, res.data, req_addr),
"fetch_block": res.data,
"error": res.err,
}

Expand All @@ -82,10 +74,10 @@ class ICache(Elaboratable, CacheInterface):
Refilling a cache line is abstracted away from this module. ICache module needs two methods
from the refiller `refiller_start`, which is called whenever we need to refill a cache line.
`refiller_accept` should be ready to be called whenever the refiller has another word ready
to be written to cache. `refiller_accept` should set `last` bit when either an error occurs
or the transfer is over. After issuing `last` bit, `refiller_accept` shouldn't be ready until
the next transfer is started.
`refiller_accept` should be ready to be called whenever the refiller has another fetch block
ready to be written to cache. `refiller_accept` should set `last` bit when either an error
occurs or the transfer is over. After issuing `last` bit, `refiller_accept` shouldn't be ready
until the next transfer is started.
"""

def __init__(self, layouts: ICacheLayouts, params: ICacheParameters, refiller: CacheRefillerInterface) -> None:
Expand Down Expand Up @@ -150,14 +142,13 @@ def elaborate(self, platform):
]

m.submodules.mem = self.mem = ICacheMemory(self.params)
m.submodules.req_fifo = self.req_fifo = FIFO(layout=self.addr_layout, depth=2)
m.submodules.res_fwd = self.res_fwd = Forwarder(layout=self.layouts.accept_res)
m.submodules.req_zipper = req_zipper = ArgumentsToResultsZipper(self.addr_layout, self.layouts.accept_res)

# State machine logic
needs_refill = Signal()
refill_finish = Signal()
refill_finish_last = Signal()
refill_error = Signal()
refill_error_saved = Signal()

flush_start = Signal()
flush_finish = Signal()
Expand All @@ -166,6 +157,7 @@ def elaborate(self, platform):
self.perf_flushes.incr(m, cond=flush_finish)

with m.FSM(reset="FLUSH") as fsm:

with m.State("FLUSH"):
with m.If(flush_finish):
m.next = "LOOKUP"
Expand All @@ -188,49 +180,56 @@ def elaborate(self, platform):
m.d.sync += way_selector.eq(way_selector.rotate_left(1))

# Fast path - read requests
request_valid = self.req_fifo.read.ready
request_addr = Signal(self.addr_layout)
mem_read_addr = Signal(self.addr_layout)
prev_mem_read_addr = Signal(self.addr_layout)
m.d.comb += assign(mem_read_addr, prev_mem_read_addr)

tag_hit = [tag_data.valid & (tag_data.tag == request_addr.tag) for tag_data in self.mem.tag_rd_data]
tag_hit_any = reduce(operator.or_, tag_hit)
mem_read_output_valid = Signal()
with Transaction(name="MemRead").body(
m, request=fsm.ongoing("LOOKUP") & (mem_read_output_valid | refill_error_saved)
):
req_addr = req_zipper.peek_arg(m)

mem_out = Signal(self.params.word_width)
for i in OneHotSwitchDynamic(m, Cat(tag_hit)):
m.d.comb += mem_out.eq(self.mem.data_rd_data[i])
tag_hit = [tag_data.valid & (tag_data.tag == req_addr.tag) for tag_data in self.mem.tag_rd_data]
tag_hit_any = reduce(operator.or_, tag_hit)

instr_out = extract_instr_from_word(m, self.params, mem_out, Value.cast(request_addr))
with m.If(tag_hit_any | refill_error_saved):
self.perf_hits.incr(m, cond=tag_hit_any)
mem_out = Signal(self.params.fetch_block_bytes * 8)
for i in OneHotSwitchDynamic(m, Cat(tag_hit)):
m.d.av_comb += mem_out.eq(self.mem.data_rd_data[i])

refill_error_saved = Signal()
m.d.comb += needs_refill.eq(request_valid & ~tag_hit_any & ~refill_error_saved)
req_zipper.write_results(m, fetch_block=mem_out, error=refill_error_saved)
m.d.sync += refill_error_saved.eq(0)
m.d.sync += mem_read_output_valid.eq(0)
with m.Else():
self.perf_misses.incr(m)

with Transaction().body(m, request=request_valid & fsm.ongoing("LOOKUP") & (tag_hit_any | refill_error_saved)):
self.perf_errors.incr(m, cond=refill_error_saved)
self.perf_misses.incr(m, cond=refill_finish_last)
self.perf_hits.incr(m, cond=~refill_finish_last)
m.d.comb += needs_refill.eq(1)

self.res_fwd.write(m, instr=instr_out, error=refill_error_saved)
m.d.sync += refill_error_saved.eq(0)
# Align to the beginning of the cache line
aligned_addr = self.serialize_addr(req_addr) & ~((1 << self.params.offset_bits) - 1)
log.debug(m, True, "Refilling line 0x{:x}", aligned_addr)
self.refiller.start_refill(m, addr=aligned_addr)

@def_method(m, self.accept_res)
def _():
self.req_fifo.read(m)
self.req_latency.stop(m)
return self.res_fwd.read(m)

mem_read_addr = Signal(self.addr_layout)
m.d.comb += assign(mem_read_addr, request_addr)
output = req_zipper.read(m)
return output.results

@def_method(m, self.issue_req, ready=accepting_requests)
def _(addr: Value) -> None:
self.perf_loads.incr(m)
self.req_latency.start(m)

deserialized = self.deserialize_addr(addr)
# Forward read address only if the method is called
m.d.comb += assign(mem_read_addr, deserialized)
m.d.sync += assign(request_addr, deserialized)
m.d.sync += assign(prev_mem_read_addr, deserialized)
req_zipper.write_args(m, deserialized)

self.req_fifo.write(m, deserialized)
m.d.sync += mem_read_output_valid.eq(1)

m.d.comb += [
self.mem.tag_rd_index.eq(mem_read_addr.index),
Expand All @@ -245,34 +244,30 @@ def _(addr: Value) -> None:

@def_method(m, self.flush, ready=accepting_requests)
def _() -> None:
log.info(m, True, "Flushing the cache...")
m.d.sync += flush_index.eq(0)
m.d.comb += flush_start.eq(1)

m.d.comb += flush_finish.eq(flush_index == self.params.num_of_sets - 1)

# Slow path - data refilling
with Transaction().body(m, request=fsm.ongoing("LOOKUP") & needs_refill):
# Align to the beginning of the cache line
aligned_addr = self.serialize_addr(request_addr) & ~((1 << self.params.offset_bits) - 1)
self.refiller.start_refill(m, addr=aligned_addr)

m.d.sync += refill_finish_last.eq(0)

with Transaction().body(m):
ret = self.refiller.accept_refill(m)
deserialized = self.deserialize_addr(ret.addr)

self.perf_errors.incr(m, cond=ret.error)

m.d.top_comb += [
self.mem.data_wr_addr.index.eq(deserialized["index"]),
self.mem.data_wr_addr.offset.eq(deserialized["offset"]),
self.mem.data_wr_data.eq(ret.data),
self.mem.data_wr_data.eq(ret.fetch_block),
]

m.d.comb += self.mem.data_wr_en.eq(1)
m.d.comb += refill_finish.eq(ret.last)
m.d.sync += refill_finish_last.eq(1)
m.d.comb += refill_error.eq(ret.error)
m.d.sync += refill_error_saved.eq(ret.error)
with m.If(ret.error):
m.d.sync += refill_error_saved.eq(1)

with m.If(fsm.ongoing("FLUSH")):
m.d.comb += [
Expand All @@ -285,9 +280,9 @@ def _() -> None:
with m.Else():
m.d.comb += [
self.mem.way_wr_en.eq(way_selector),
self.mem.tag_wr_index.eq(request_addr.index),
self.mem.tag_wr_index.eq(mem_read_addr.index),
self.mem.tag_wr_data.valid.eq(~refill_error),
self.mem.tag_wr_data.tag.eq(request_addr.tag),
self.mem.tag_wr_data.tag.eq(mem_read_addr.tag),
self.mem.tag_wr_en.eq(refill_finish),
]

Expand All @@ -301,7 +296,7 @@ class ICacheMemory(Elaboratable):
Writes are multiplexed using one-hot `way_wr_en` signal. Read data lines from all
ways are separately exposed (as an array).
The data memory is addressed using a machine word.
The data memory is addressed using fetch blocks.
"""

def __init__(self, params: ICacheParameters) -> None:
Expand All @@ -319,11 +314,13 @@ def __init__(self, params: ICacheParameters) -> None:

self.data_addr_layout = make_layout(("index", self.params.index_bits), ("offset", self.params.offset_bits))

self.fetch_block_bits = params.fetch_block_bytes * 8

self.data_rd_addr = Signal(self.data_addr_layout)
self.data_rd_data = Array([Signal(self.params.word_width) for _ in range(self.params.num_of_ways)])
self.data_rd_data = Array([Signal(self.fetch_block_bits) for _ in range(self.params.num_of_ways)])
self.data_wr_addr = Signal(self.data_addr_layout)
self.data_wr_en = Signal()
self.data_wr_data = Signal(self.params.word_width)
self.data_wr_data = Signal(self.fetch_block_bits)

def elaborate(self, platform):
m = TModule()
Expand All @@ -345,17 +342,18 @@ def elaborate(self, platform):
tag_mem_wp.en.eq(self.tag_wr_en & way_wr),
]

data_mem = Memory(width=self.params.word_width, depth=self.params.num_of_sets * self.params.words_in_block)
data_mem = Memory(
width=self.fetch_block_bits, depth=self.params.num_of_sets * self.params.fetch_blocks_in_line
)
data_mem_rp = data_mem.read_port()
data_mem_wp = data_mem.write_port()
m.submodules[f"data_mem_{i}_rp"] = data_mem_rp
m.submodules[f"data_mem_{i}_wp"] = data_mem_wp

# We address the data RAM using machine words, so we have to
# We address the data RAM using fetch blocks, so we have to
# discard a few least significant bits from the address.
redundant_offset_bits = exact_log2(self.params.word_width_bytes)
rd_addr = Cat(self.data_rd_addr.offset, self.data_rd_addr.index)[redundant_offset_bits:]
wr_addr = Cat(self.data_wr_addr.offset, self.data_wr_addr.index)[redundant_offset_bits:]
rd_addr = Cat(self.data_rd_addr.offset, self.data_rd_addr.index)[self.params.fetch_block_bytes_log :]
wr_addr = Cat(self.data_wr_addr.offset, self.data_wr_addr.index)[self.params.fetch_block_bytes_log :]

m.d.comb += [
self.data_rd_data[i].eq(data_mem_rp.data),
Expand Down
2 changes: 1 addition & 1 deletion coreblocks/cache/iface.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class CacheRefillerInterface(HasElaborate, Protocol):
start_refill : Method
A method that is used to start a refill for a given cache line.
accept_refill : Method
A method that is used to accept one word from the requested cache line.
A method that is used to accept one fetch block from the requested cache line.
"""

start_refill: Method
Expand Down
Loading

0 comments on commit 5cfcdcc

Please sign in to comment.