Skip to content

Commit

Permalink
Merge pull request #157 from OpenXiangShan/ruby-pf
Browse files Browse the repository at this point in the history
Ruby pf
  • Loading branch information
shinezyy authored Aug 15, 2024
2 parents 48c5acc + 7e50c7a commit 8f7a0f5
Show file tree
Hide file tree
Showing 59 changed files with 891 additions and 338 deletions.
12 changes: 0 additions & 12 deletions configs/common/CacheConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,17 +170,14 @@ def config_cache(options, system):
system.tol3bus = L2XBar(clk_domain=system.cpu_clk_domain, width=256)
system.l3.cpu_side = system.tol3bus.mem_side_ports
system.l3.mem_side = system.membus.cpu_side_ports
system.l3.max_cache_level = 3

for i in range(options.num_cpus):
if options.l3cache:
# l2 -> tol3bus -> l3
system.l2_caches[i].mem_side = system.tol3bus.cpu_side_ports
# l3 -> membus
system.l2_caches[i].max_cache_level = 3
else:
system.l2_caches[i].mem_side = system.membus.cpu_side_ports
system.l2_caches[i].max_cache_level = 2

if options.memchecker:
system.memchecker = MemChecker()
Expand All @@ -189,14 +186,7 @@ def config_cache(options, system):
if options.caches:
icache = icache_class(**_get_cache_opts('l1i', options))
dcache = dcache_class(**_get_cache_opts('l1d', options))
if options.l2cache:
icache.max_cache_level = 2
dcache.max_cache_level = 2
if options.l3cache:
icache.max_cache_level = 3
dcache.max_cache_level = 3
if dcache.prefetcher != NULL:
print("Add dtb for L1D prefetcher")
dcache.prefetcher.registerTLB(system.cpu[i].mmu.dtb)
if options.l1d_hwp_type == 'XSCompositePrefetcher':
if options.l1d_enable_spp:
Expand Down Expand Up @@ -233,15 +223,13 @@ def config_cache(options, system):
dcache.prefetcher.add_pf_downstream(system.l2_caches[i].prefetcher)
system.l2_caches[i].prefetcher.queue_size = 64
system.l2_caches[i].prefetcher.max_prefetch_requests_with_pending_translation = 128
print("Add L2 prefetcher {} as downstream of L1D prefetcher {}".format(i, i))

if options.l3cache and options.l2_to_l3_pf_hint:
assert system.l2_caches[i].prefetcher != NULL and \
system.l3.prefetcher != NULL
system.l2_caches[i].prefetcher.add_pf_downstream(system.l3.prefetcher)
system.l3.prefetcher.queue_size = 64
system.l3.prefetcher.max_prefetch_requests_with_pending_translation = 128
print("Add L3 prefetcher as downstream of L2 prefetcher {}".format(i))

# If we have a walker cache specified, instantiate two
# instances here
Expand Down
3 changes: 0 additions & 3 deletions configs/common/Caches.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,6 @@ class L2Cache(Cache):
mshrs = 64
tgts_per_mshr = 20
clusivity='mostly_incl'
prefetch_on_access = True
#prefetch_on_access = False
# always writeback clean when lower level is exclusive
writeback_clean = True

Expand All @@ -110,7 +108,6 @@ class L3Cache(Cache):
tgts_per_mshr = 20
clusivity='mostly_excl'
writeback_clean = False
prefetch_on_access = True

# aligned latency:
tag_latency = 2
Expand Down
52 changes: 52 additions & 0 deletions configs/common/PrefetcherConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import m5
from m5.objects import *
from common.Caches import *
from common import ObjectList


def _get_hwp(hwp_option):
if hwp_option == None:
return NULL

hwpClass = ObjectList.hwp_list.get(hwp_option)
return hwpClass()

def create_prefetcher(cpu, cache_level, options):
prefetcher_attr = '{}_hwp_type'.format(cache_level)
prefetcher_name = ''
prefetcher = NULL
if hasattr(options, prefetcher_attr):
prefetcher_name = getattr(options, prefetcher_attr)
prefetcher = _get_hwp(prefetcher_name)
print(f"create_prefetcher at {cache_level}: {prefetcher_name}")

if prefetcher == NULL:
return NULL

if cpu != NULL:
prefetcher.registerTLB(cpu.mmu.dtb)

prefetcher.queue_size = 64

if prefetcher_name == 'XSCompositePrefetcher':
if options.l1d_enable_spp:
prefetcher.enable_spp = True
if options.l1d_enable_cplx:
prefetcher.enable_cplx = True
prefetcher.pht_pf_level = options.pht_pf_level
prefetcher.short_stride_thres = options.short_stride_thres
prefetcher.fuzzy_stride_matching = False
prefetcher.stream_pf_ahead = True
prefetcher.bop_large.delay_queue_enable = True
prefetcher.bop_large.bad_score = 10
prefetcher.bop_small.delay_queue_enable = True
prefetcher.bop_small.bad_score = 5
prefetcher.queue_size = 128
prefetcher.max_prefetch_requests_with_pending_translation = 128
prefetcher.region_size = 64*16 # 64B * blocks per region

prefetcher.berti.use_byte_addr = True
prefetcher.berti.aggressive_pf = False
prefetcher.berti.trigger_pht = True

return prefetcher
1 change: 0 additions & 1 deletion configs/common/cores/arm/O3_ARM_v7a.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ class O3_ARM_v7aL2(Cache):
size = '1MB'
assoc = 16
write_buffers = 8
prefetch_on_access = True
clusivity = 'mostly_excl'
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=8, latency = 1)
Expand Down
1 change: 0 additions & 1 deletion configs/common/cores/arm/ex5_LITTLE.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ class L2(Cache):
size = '512kB'
assoc = 8
write_buffers = 16
prefetch_on_access = True
clusivity = 'mostly_excl'
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=1, latency = 1)
Expand Down
1 change: 0 additions & 1 deletion configs/common/cores/arm/ex5_big.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ class L2(Cache):
size = '2MB'
assoc = 16
write_buffers = 8
prefetch_on_access = True
clusivity = 'mostly_excl'
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=8, latency = 1)
Expand Down
21 changes: 17 additions & 4 deletions configs/ruby/CHI.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,29 @@ def create_system(
# dataAccessLatency may be set to 0 if one wants to consider parallel
# data and tag lookups
class L1ICache(RubyCache):
level = 1
dataAccessLatency = 1
tagAccessLatency = 1
size = options.l1i_size
assoc = options.l1i_assoc

class L1DCache(RubyCache):
level = 1
dataAccessLatency = 0
tagAccessLatency = 1
size = options.l1d_size
assoc = options.l1d_assoc

class L2Cache(RubyCache):
dataAccessLatency = 6
level = 2
dataAccessLatency = 13
tagAccessLatency = 2
size = options.l2_size
assoc = options.l2_assoc

class HNFCache(RubyCache):
dataAccessLatency = 10
level = 3
dataAccessLatency = 17
tagAccessLatency = 2
size = options.l3_size
assoc = options.l3_assoc
Expand All @@ -154,11 +158,13 @@ class HNFCache(RubyCache):
L1ICache,
L1DCache,
system.cache_line_size.value,
options
)
for cpu in cpus
]

for rnf in ruby_system.rnf:
rnf.addPrivL2Cache(L2Cache)
rnf.addPrivL2Cache(L2Cache, options)
cpu_sequencers.extend(rnf.getSequencers())
all_cntrls.extend(rnf.getAllControllers())
network_nodes.append(rnf)
Expand Down Expand Up @@ -191,10 +197,17 @@ class HNFCache(RubyCache):
hnf_list = [i for i in range(options.num_l3caches)]
CHI_HNF.createAddrRanges(sysranges, system.cache_line_size.value, hnf_list)
ruby_system.hnf = [
CHI_HNF(i, ruby_system, HNFCache, None)
CHI_HNF(i, ruby_system, HNFCache, options, None)
for i in range(options.num_l3caches)
]

if options.l2_to_l3_pf_hint:
if len(ruby_system.hnf) > 1:
Warning("L2 to L3 prefetch hint is not supported with multiple HNFs")
else:
for rnf in ruby_system.rnf:
rnf.addLLCPrefetcherDownstream(ruby_system.hnf[0].getPrefetcher())

for hnf in ruby_system.hnf:
network_nodes.append(hnf)
network_cntrls.extend(hnf.getNetworkSideControllers())
Expand Down
Loading

0 comments on commit 8f7a0f5

Please sign in to comment.