Skip to content

Commit

Permalink
add single-core spr model
Browse files Browse the repository at this point in the history
  • Loading branch information
plavin committed Nov 13, 2024
1 parent 0d899bb commit 85c50ff
Showing 1 changed file with 168 additions and 0 deletions.
168 changes: 168 additions & 0 deletions src/sst/elements/miranda/tests/miranda-spr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import sst
import sys
from sst import UnitAlgebra

# Define SST core options
sst.setProgramOption("timebase", "1ps")

load_queue = 240
store_queue = 112
load_per_cycle = 3
store_per_cycle = 2
l1_ltu_latency = 5 # ~1.4ns (5 cycles @ turbo)
l1_latency = l1_ltu_latency - 2 # account for cycles of transfer to and from cache
protocol = "mesi"

freq_turbo = "3.5GHz"

l1_cache_params = {
"cache_frequency" : freq_turbo,
"coherence_protocol" : "mesi",
"replacement_policy" : "lru",
"cache_size" : "48KiB",
"associativity" : 12,
"access_latency_cycles" : l1_latency, # Assume parallel tag/data lookup so no separate tag latency
"mshr_num_entries" : 16,
"maxRequestDelay" : 1000000000, # if a request is delayed for 1M cycles there's a problem
"events_up_per_cycle" : load_per_cycle + store_per_cycle, # Not perfect, could result in 4 loads
"mshr_latency_cycles" : 1, # Trivial at 16 entries, but still a guess
"L1" : 1,
}

l2_ltu_latency = 16 # ~5ns
l2_tag_latency = 2 # Guess
l2_latency = l2_ltu_latency - l1_ltu_latency - l2_tag_latency - 2

l2_cache_params = {
"cache_frequency" : freq_turbo,
"coherence_protocol" : protocol,
"replacement_policy" : "lru",
"cache_size" : "2MiB",
"associativity" : 16,
# Total load-to-use = l2_latency
"access_latency_cycles" : l2_latency - 2, # Total is l2_latency, assuming serial tag/data lookup so split
"tag_access_latency_cycles": 2,
"mshr_num_entries" : 48,
"events_up_per_cycle" : 1,
"mshr_latency_cycles" : 1, # Trivial at 16 entries, but still a guess
}

l3_cache_params = {
"cache_frequency" : freq_turbo,
"coherence_protocol" : protocol,
"replacement_policy" : "random",
"cache_size" : "1875KiB",
"associativity" : 15,
"access_latency_cycles" : 26,
"tag_latency_cycles" : 4, # Guesss
"mshr_num_entries" : 72, # Guess, just doubled l2 to avoid most NACKs
"mshr_latency_cycles" : 4, # Guess
}

## Memory - DDR5 @ 4800MT/s
mem_channels = 8
mem_capacity = UnitAlgebra("16GiB") # Per-channel (8 channels total)
mem_page_size = UnitAlgebra("4KiB")
mem_pages = mem_capacity * UnitAlgebra(mem_channels) / mem_page_size
ddr_clock = "4800MHz" # ddr5 4800
ddr_tCL = 40
ddr_tCWL = 39
ddr_tRCD = 39
ddr_tRP = 39

mem_timing_dram_params = {
"addrMapper" : "memHierarchy.roundRobinAddrMapper",
"clock" : ddr_clock,
"channels" : 3,
"channel.numRanks" : 2,
"channel.transaction_Q_size" : 32,
"channel.rank.numBanks" : 16,
"channel.rank.bank.CL" : ddr_tCL,
"channel.rank.bank.CL_WR" : ddr_tCWL,
"channel.rank.bank.RCD" : ddr_tRCD,
"channel.rank.bank.TRP" : ddr_tRP,
"channel.rank.bank.dataCycles" : 4, # Cycles to return data (4 if burst8)
"channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy",
"channel.rank.bank.transactionQ" : "memHierarchy.reorderTransactionQ",
"channel.rank.bank.pagePolicy.close" : 0,
"id" : 0,
"mem_size" : mem_capacity,
}

miranda_params_cpu = {
"printStats" : 1,
"clock" : freq_turbo,
"max_reqs_cycle" : load_per_cycle + store_per_cycle,
"max_reorder_lookups" : 256,
"maxmemreqpending" : load_queue + store_queue,
"pagesize" : int(mem_page_size),
"pagecount" : mem_pages
}

# Define the simulation components
cpu = sst.Component("cpu", "miranda.BaseCPU")
cpu.addParams(miranda_params_cpu)

#gen = comp_cpu.setSubComponent("generator", "miranda.SpatterBenchGenerator")
#gen.addParams({
# "verbose" : 2,
# "args" : " ".join(sys.argv[1:])
#})

gen = cpu.setSubComponent("generator", "miranda.CopyGenerator")

# Tell SST what statistics handling we want
sst.setStatisticLoadLevel(4)

# Enable statistics outputs
cpu.enableAllStatistics({"type":"sst.AccumulatorStatistic"})

l1_cache = sst.Component("l1cache", "memHierarchy.Cache")
l1_cache.addParams(l1_cache_params)

l2_cache = sst.Component("l2cache", "memHierarchy.Cache")
l2_cache.addParams(l2_cache_params)

l3_cache = sst.Component("l3cache", "memHierarchy.Cache")
l3_cache.addParams(l3_cache_params)

memctrl = sst.Component("memory", "memHierarchy.MemController")
memctrl.addParams({
"clock" : "1GHz",
"addr_range_end" : 4096 * 1024 * 1024 - 1
})
memory = memctrl.setSubComponent("backend", "memHierarchy.timingDRAM")
memory.addParams(mem_timing_dram_params)

# Define the simulation links
link_cpu_l1 = sst.Link("link_cpu_l1")
link_l1_l2 = sst.Link("link_l1_l2")
link_l2_l3 = sst.Link("link_l2_l3")
link_l3_mem = sst.Link("link_l3_mem")

link_cpu_l1.connect( (cpu, "cache_link", "100ps"),
(l1_cache, "high_network_0", "100ps") )

link_l1_l2.connect( (l1_cache, "low_network_0", "100ps"),
(l2_cache, "high_network_0", "100ps") )

link_l2_l3.connect( (l2_cache, "low_network_0", "100ps"),
(l3_cache, "high_network_0", "100ps") )

link_l3_mem.connect( (l3_cache, "low_network_0", "100ps", ),
(memctrl, "direct_link", "100ps") )

sst.setStatisticOutput("sst.statOutputCSV")

################### Sources ###################
# This config is compiled from various sources including
# - Anandtech: Intel Xeon Sapphire Rapids: How To Go Monolithic with Tiles
# - Anandtech: Intel Architecture Day 2021: Alder Lake, Golden Cove, and Gracemont Detailed
# - wccftech: Intel Sapphire Rapids-SP Xeon CPU Lineup Detaield: Platinum & HBM Variants with Over 350W TDP, C740 Chipset Compatibility (2022-05-15)
# https://wccftech.com/intel-sapphire-rapids-sp-xeon-amd-epyc-7773x-milan-x-cpu-cache-memory-benchmarks-leak/
# - Wikipedia (accessed 7/1/2022 and 4/11/2024)
# - LANL arXiv paper: arxiv.org/pdf/2211/05712.pdf
# - https://wccftech.com/intel-4th-gen-xeon-cpus-official-sapphire-rapids-up-to-60-cores-8-socket-scalability-350w-tdp-17000-usd/
# - https://chipsandcheese.com/2023/03/12/a-peek-at-sapphire-rapids/
# - https://www.ixpug.org/images/docs/ISC23/McCalpin_SPR_BW_limits_2023-05-24_final.pdf
###############################################

0 comments on commit 85c50ff

Please sign in to comment.