Skip to content

Commit

Permalink
Add E-core TMA3.6 perf json files
Browse files Browse the repository at this point in the history
  • Loading branch information
weilinwa committed May 31, 2024
1 parent 6891f3b commit d9c2faa
Show file tree
Hide file tree
Showing 23 changed files with 2,587 additions and 779 deletions.
678 changes: 293 additions & 385 deletions scripts/perf/alderlake/adl-metrics.json

Large diffs are not rendered by default.

66 changes: 64 additions & 2 deletions scripts/perf/alderlake/cache.json
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,22 @@
"UMask": "0x28",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 writebacks that access L2 cache",
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
"PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
"PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
"PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
"SampleAfterValue": "200003",
"UMask": "0x41",
"Unit": "cpu_atom"
Expand All @@ -304,7 +314,7 @@
"Counter": "0,1,2,3,4,5",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
"PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
"PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
"SampleAfterValue": "200003",
"UMask": "0x4f",
"Unit": "cpu_atom"
Expand Down Expand Up @@ -896,6 +906,17 @@
"UMask": "0x5",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of load uops retired that performed one or more locks.",
"Counter": "0,1,2,3,4,5",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
"PEBS": "1",
"SampleAfterValue": "200003",
"UMask": "0x21",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of retired split load uops.",
"Counter": "0,1,2,3,4,5",
Expand Down Expand Up @@ -1047,6 +1068,16 @@
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cacheable and noncacheable code read requests",
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
"PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand Data Read requests sent to uncore",
"Counter": "0,1,2,3",
Expand All @@ -1057,6 +1088,16 @@
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
"PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
"Counter": "0,1,2,3",
Expand All @@ -1079,6 +1120,17 @@
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
"PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.",
"Counter": "0,1,2,3",
Expand Down Expand Up @@ -1110,6 +1162,16 @@
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
"PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.",
"Counter": "0,1,2,3",
Expand Down
11 changes: 11 additions & 0 deletions scripts/perf/alderlake/frontend.json
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,17 @@
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "ICACHE_DATA.STALL_PERIODS",
"Counter": "0,1,2,3",
"CounterMask": "1",
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"Counter": "0,1,2,3",
Expand Down
14 changes: 14 additions & 0 deletions scripts/perf/alderlake/memory.json
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,20 @@
"UMask": "0x9",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
"Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
"PEBS": "2",
"PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "53",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Counter": "1,2,3,4,5,6,7",
Expand Down
10 changes: 6 additions & 4 deletions scripts/perf/alderlake/metricgroups.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,17 @@
"Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Ifetch": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"IntVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"Mem_Exec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
Expand Down Expand Up @@ -73,6 +76,7 @@
"TopdownL4": "Metrics for top-down breakdown at level 4",
"TopdownL5": "Metrics for top-down breakdown at level 5",
"TopdownL6": "Metrics for top-down breakdown at level 6",
"load_store_bound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
"tma_L1_group": "Metrics for top-down breakdown at level 1",
"tma_L2_group": "Metrics for top-down breakdown at level 2",
"tma_L3_group": "Metrics for top-down breakdown at level 3",
Expand All @@ -81,10 +85,8 @@
"tma_L6_group": "Metrics for top-down breakdown at level 6",
"tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
"tma_assists_group": "Metrics contributing to tma_assists category",
"tma_backend_bound_aux_group": "Metrics contributing to tma_backend_bound_aux category",
"tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
"tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
"tma_base_group": "Metrics contributing to tma_base category",
"tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
"tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
"tma_core_bound_group": "Metrics contributing to tma_core_bound category",
Expand All @@ -97,6 +99,8 @@
"tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
"tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
"tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
"tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category",
"tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category",
"tma_int_operations_group": "Metrics contributing to tma_int_operations category",
"tma_issue2P": "Metrics related by the issue $issue2P",
"tma_issueBM": "Metrics related by the issue $issueBM",
Expand All @@ -123,11 +127,9 @@
"tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
"tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
"tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
"tma_mem_scheduler_group": "Metrics contributing to tma_mem_scheduler category",
"tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
"tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
"tma_mite_group": "Metrics contributing to tma_mite category",
"tma_nuke_group": "Metrics contributing to tma_nuke category",
"tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
"tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
"tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
Expand Down
9 changes: 9 additions & 0 deletions scripts/perf/alderlake/other.json
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,15 @@
"UMask": "0x7",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x75",
"EventName": "SERIALIZATION.C01_MS_SCB",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Cycles the uncore cannot take further requests",
"Counter": "0,1,2,3",
Expand Down
19 changes: 19 additions & 0 deletions scripts/perf/alderlake/pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -1765,6 +1765,15 @@
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of uops issued by the front end every cycle.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x0e",
"EventName": "UOPS_ISSUED.ANY",
"PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2. Uops_issued correlates to the number of ROB entries. If uop takes 2 ROB slots it counts as 2 uops_issued.",
"SampleAfterValue": "200003",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Uops that RAT issues to RS",
"Counter": "0,1,2,3,4,5,6,7",
Expand All @@ -1775,6 +1784,16 @@
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "UOPS_ISSUED.CYCLES",
"Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the total number of uops retired.",
"Counter": "0,1,2,3,4,5",
Expand Down
Loading

0 comments on commit d9c2faa

Please sign in to comment.