diff --git a/scripts/perf/alderlake/adl-metrics.json b/scripts/perf/alderlake/adl-metrics.json index 8b0417e8..3147ceda 100644 --- a/scripts/perf/alderlake/adl-metrics.json +++ b/scripts/perf/alderlake/adl-metrics.json @@ -113,42 +113,30 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_alloc_restriction", - "MetricThreshold": "tma_alloc_restriction > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions", + "MetricExpr": "tma_core_bound", + "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", + "MetricName": "tma_allocation_restriction", + "MetricThreshold": "tma_allocation_restriction > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.1)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.1", "MetricgroupNoGroup": "TopdownL1;Default", - "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", - "ScaleUnit": "100%", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", - "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "tma_backend_bound", - "MetricGroup": "Default;TopdownL1;tma_L1_group", - "MetricName": "tma_backend_bound_aux", - "MetricThreshold": "tma_backend_bound_aux > 0.2", - "MetricgroupNoGroup": "TopdownL1;Default", - "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "(tma_info_core_slots - (cpu_atom@TOPDOWN_FE_BOUND.ALL@ + cpu_atom@TOPDOWN_BE_BOUND.ALL@ + cpu_atom@TOPDOWN_RETIRING.ALL@)) / tma_info_core_slots", + "MetricExpr": "(5 * cpu_atom@CPU_CLK_UNHALTED.CORE@ - (cpu_atom@TOPDOWN_FE_BOUND.ALL@ + cpu_atom@TOPDOWN_BE_BOUND.ALL@ + cpu_atom@TOPDOWN_RETIRING.ALL@)) / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", @@ -158,28 +146,18 @@ "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of uops that are not from the microsequencer.", - "MetricExpr": "(cpu_atom@TOPDOWN_RETIRING.ALL@ - cpu_atom@UOPS_RETIRED.MS@) / tma_info_core_slots", - "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", - "MetricName": "tma_base", - "MetricThreshold": "tma_base > 0.6 & tma_retiring > 0.75", - "MetricgroupNoGroup": "TopdownL2", - "ScaleUnit": "100%", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", "MetricName": "tma_branch_detect", - "MetricThreshold": "tma_branch_detect > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", - "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "MetricThreshold": "tma_branch_detect > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", - "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.05 & tma_bad_speculation > 0.15", @@ -188,26 +166,26 @@ "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", "MetricName": "tma_branch_resteer", - "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", "MetricName": "tma_cisc", - "MetricThreshold": "tma_cisc > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_cisc > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls.", - "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)", + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are bounded by core restrictions and not attributed to an outstanding load or stores, or resource limitation", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.1", @@ -216,586 +194,516 @@ "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", "MetricName": "tma_decode", - "MetricThreshold": "tma_decode > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_decode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation.", - "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.DISAMBIGUATION@ / cpu_atom@MACHINE_CLEARS.SLOW@)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_disambiguation", - "MetricThreshold": "tma_disambiguation > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that does not require the use of microcode, classified as a fast nuke, due to memory ordering, memory disambiguation and memory renaming", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_dram_bound", - "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "Default;TopdownL1;tma_L1_group", + "MetricName": "tma_frontend_bound", + "MetricThreshold": "tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1;Default", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", - "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", - "MetricName": "tma_fast_nuke", - "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_icache_misses", + "MetricThreshold": "tma_icache_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", - "MetricName": "tma_fetch_bandwidth", - "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2", + "MetricName": "tma_ifetch_bandwidth", + "MetricThreshold": "tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2", "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend latency restrictions due to icache misses, itlb misses, branch detection, and resteer limitations.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", - "MetricName": "tma_fetch_latency", - "MetricThreshold": "tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2", + "MetricName": "tma_ifetch_latency", + "MetricThreshold": "tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2", "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists.", - "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.FP_ASSIST@ / cpu_atom@MACHINE_CLEARS.SLOW@)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_fp_assist", - "MetricThreshold": "tma_fp_assist > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss", + "MetricExpr": "100 * (cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ + cpu_atom@LD_HEAD.PGWALK_AT_RET@) / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricName": "tma_info_bottleneck_%_dtlb_miss_bound_cycles", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of floating point divide operations per uop.", - "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", - "MetricName": "tma_fpdiv_uops", - "MetricThreshold": "tma_fpdiv_uops > 0.2 & (tma_base > 0.6 & tma_retiring > 0.75)", - "ScaleUnit": "100%", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", - "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots", - "MetricGroup": "Default;TopdownL1;tma_L1_group", - "MetricName": "tma_frontend_bound", - "MetricThreshold": "tma_frontend_bound > 0.2", - "MetricgroupNoGroup": "TopdownL1;Default", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricGroup": "Ifetch", + "MetricName": "tma_info_bottleneck_%_ifetch_miss_bound_cycles", + "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", - "MetricName": "tma_icache_misses", - "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.LOAD@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricGroup": "Load_Store_Miss", + "MetricName": "tma_info_bottleneck_%_load_miss_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound", "Unit": "cpu_atom" }, { - "BriefDescription": "", - "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@", - "MetricName": "tma_info_core_clks", + "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall", + "MetricExpr": "100 * cpu_atom@LD_HEAD.ANY_AT_RET@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricGroup": "Mem_Exec", + "MetricName": "tma_info_bottleneck_%_mem_exec_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound", "Unit": "cpu_atom" }, { - "BriefDescription": "", - "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE_P@", - "MetricName": "tma_info_core_clks_p", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipbranch", "Unit": "cpu_atom" }, { - "BriefDescription": "Cycles Per Instruction", - "MetricExpr": "tma_info_core_clks / INST_RETIRED.ANY", - "MetricName": "tma_info_core_cpi", + "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL", + "MetricName": "tma_info_br_inst_mix_ipcall", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions Per Cycle", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks", - "MetricName": "tma_info_core_ipc", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@BR_INST_RETIRED.FAR_BRANCH@u", + "MetricName": "tma_info_br_inst_mix_ipfarbranch", "Unit": "cpu_atom" }, { - "BriefDescription": "", - "MetricExpr": "5 * tma_info_core_clks", - "MetricName": "tma_info_core_slots", + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_ntaken", "Unit": "cpu_atom" }, { - "BriefDescription": "Uops Per Instruction", - "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY", - "MetricName": "tma_info_core_upi", + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_taken", "Unit": "cpu_atom" }, { - "BriefDescription": "Percent of instruction miss cost that hit in DRAM", - "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@", - "MetricName": "tma_info_frontend_inst_miss_cost_dramhit_percent", + "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT", + "MetricName": "tma_info_br_inst_mix_ipmisp_indirect", "Unit": "cpu_atom" }, { - "BriefDescription": "Percent of instruction miss cost that hit in the L2", - "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@", - "MetricName": "tma_info_frontend_inst_miss_cost_l2hit_percent", + "BriefDescription": "Instructions per retired return Branch Misprediction", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN", + "MetricName": "tma_info_br_inst_mix_ipmisp_ret", "Unit": "cpu_atom" }, { - "BriefDescription": "Percent of instruction miss cost that hit in the L3", - "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@", - "MetricName": "tma_info_frontend_inst_miss_cost_l3hit_percent", + "BriefDescription": "Instructions per retired Branch Misprediction", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipmispredict", "Unit": "cpu_atom" }, { "BriefDescription": "Ratio of all branches which mispredict", "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES", - "MetricName": "tma_info_inst_mix_branch_mispredict_ratio", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_ratio", "Unit": "cpu_atom" }, { "BriefDescription": "Ratio between Mispredicted branches and unknown branches", "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY", - "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_to_unknown_branch_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of all uops which are FPDiv uops", - "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.FPDIV@ / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_fpdiv_uop_ratio", + "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full", + "MetricExpr": "100 * cpu_atom@MEM_SCHEDULER_BLOCK.LD_BUF@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricName": "tma_info_buffer_stalls_%_load_buffer_stall_cycles", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of all uops which are IDiv uops", - "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.IDIV@ / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_idiv_uop_ratio", + "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full", + "MetricExpr": "100 * cpu_atom@MEM_SCHEDULER_BLOCK.RSV@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricName": "tma_info_buffer_stalls_%_mem_rsv_stall_cycles", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", - "MetricName": "tma_info_inst_mix_ipbranch", + "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full", + "MetricExpr": "100 * cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricName": "tma_info_buffer_stalls_%_store_buffer_stall_cycles", "Unit": "cpu_atom" }, { - "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL", - "MetricName": "tma_info_inst_mix_ipcall", + "BriefDescription": "Cycles Per Instruction", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@ / INST_RETIRED.ANY", + "MetricName": "tma_info_core_cpi", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per Far Branch", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", - "MetricName": "tma_info_inst_mix_ipfarbranch", + "BriefDescription": "Instructions Per Cycle", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricName": "tma_info_core_ipc", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per Load", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_inst_mix_ipload", + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY", + "MetricName": "tma_info_core_upi", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", - "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken", + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_L2_HIT@ / MEM_BOUND_STALLS.IFETCH", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l2hit", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", - "MetricName": "tma_info_inst_mix_ipmisp_cond_taken", + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_LLC_HIT@ / MEM_BOUND_STALLS.IFETCH", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3hit", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT", - "MetricName": "tma_info_inst_mix_ipmisp_indirect", + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss subsequently misses in the L3", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_DRAM_HIT@ / MEM_BOUND_STALLS.IFETCH", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3miss", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per retired return Branch Misprediction", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN", - "MetricName": "tma_info_inst_mix_ipmisp_ret", + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_BOUND_STALLS.LOAD", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l2hit", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per retired Branch Misprediction", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", - "MetricName": "tma_info_inst_mix_ipmispredict", + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_BOUND_STALLS.LOAD", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3hit", "Unit": "cpu_atom" }, { - "BriefDescription": "Instructions per Store", - "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES", - "MetricName": "tma_info_inst_mix_ipstore", + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses the L3", + "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_BOUND_STALLS.LOAD", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3miss", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of all uops which are ucode ops", - "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.MS@ / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_microcode_uop_ratio", + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a pipeline block", + "MetricExpr": "100 * cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_l1_bound", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of all uops which are x87 uops", - "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.X87@ / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_x87_uop_ratio", + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement", + "MetricExpr": "100 * (cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ + cpu_atom@MEM_BOUND_STALLS.LOAD@) / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_load_bound", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block", - "MetricExpr": "100 * cpu_atom@LD_BLOCKS.4K_ALIAS@ / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_l1_bound_address_alias_blocks", + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full", + "MetricExpr": "100 * (cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@MEM_SCHEDULER_BLOCK.ALL@) * tma_mem_scheduler", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_store_bound", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of total non-speculative loads that are splits", - "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.SPLIT_LOADS@ / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_l1_bound_load_splits", + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to memory disambiguation", + "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.DISAMBIGUATION@ / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_disamb_pki", "Unit": "cpu_atom" }, { - "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", - "MetricExpr": "100 * cpu_atom@LD_BLOCKS.DATA_UNKNOWN@ / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_l1_bound_store_fwd_blocks", + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to floating point assists", + "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.FP_ASSIST@ / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_fp_assist_pki", "Unit": "cpu_atom" }, { - "BriefDescription": "Cycle cost per DRAM hit", - "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit", + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to memory ordering", + "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.MEMORY_ORDERING@ / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_monuke_pki", "Unit": "cpu_atom" }, { - "BriefDescription": "Cycle cost per L2 hit", - "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT", - "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit", + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to memory renaming", + "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.MRN_NUKE@ / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_mrn_pki", "Unit": "cpu_atom" }, { - "BriefDescription": "Cycle cost per LLC hit", - "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT", - "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit", + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to page faults", + "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.PAGE_FAULT@ / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_page_fault_pki", "Unit": "cpu_atom" }, { - "BriefDescription": "load ops retired per 1000 instruction", - "MetricExpr": "1e3 * cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@ / INST_RETIRED.ANY", - "MetricName": "tma_info_memory_memloadpki", + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to self-modifying code", + "MetricExpr": "1e3 * cpu_atom@MACHINE_CLEARS.SMC@ / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_smc_pki", "Unit": "cpu_atom" }, { - "BriefDescription": "Average CPU Utilization", - "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC", - "MetricName": "tma_info_system_cpu_utilization", + "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block", + "MetricExpr": "100 * cpu_atom@LD_BLOCKS.4K_ALIAS@ / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_adressaliasing", "Unit": "cpu_atom" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE", - "MetricGroup": "Summary", - "MetricName": "tma_info_system_kernel_utilization", + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * cpu_atom@LD_BLOCKS.DATA_UNKNOWN@ / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_storefwdblk", "Unit": "cpu_atom" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", - "MetricExpr": "tma_info_core_clks / CPU_CLK_UNHALTED.REF_TSC", - "MetricGroup": "Power", - "MetricName": "tma_info_system_turbo_utilization", + "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss", + "MetricExpr": "100 * cpu_atom@LD_HEAD.L1_MISS_AT_RET@ / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_l1miss", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", - "MetricName": "tma_itlb_misses", - "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc", + "MetricExpr": "100 * cpu_atom@LD_HEAD.OTHER_AT_RET@ / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_otherpipelineblks", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", - "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_l1_bound", - "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk", + "MetricExpr": "100 * cpu_atom@LD_HEAD.PGWALK_AT_RET@ / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_pagewalk", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", - "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_l2_bound", - "MetricThreshold": "tma_l2_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss", + "MetricExpr": "100 * cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_stlbhit", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_l3_bound", - "MetricThreshold": "tma_l3_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match", + "MetricExpr": "100 * cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_storefwding", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full", - "MetricExpr": "tma_mem_scheduler * cpu_atom@MEM_SCHEDULER_BLOCK.LD_BUF@ / MEM_SCHEDULER_BLOCK.ALL", - "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", - "MetricName": "tma_ld_buffer", - "MetricThreshold": "tma_ld_buffer > 0.05 & (tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2))", - "ScaleUnit": "100%", + "BriefDescription": "Instructions per Load", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_ipload", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", - "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots", - "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", - "MetricName": "tma_machine_clears", - "MetricThreshold": "tma_machine_clears > 0.05 & tma_bad_speculation > 0.15", - "MetricgroupNoGroup": "TopdownL2", - "ScaleUnit": "100%", + "BriefDescription": "Instructions per Store", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "tma_info_mem_mix_ipstore", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_mem_scheduler", - "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of total non-speculative loads that perform one or more locks", + "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.LOCK_LOADS@ / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_locks_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", - "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", - "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", - "MetricName": "tma_memory_bound", - "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.1", - "MetricgroupNoGroup": "TopdownL2", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.SPLIT_LOADS@ / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_splits_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering.", - "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.MEMORY_ORDERING@ / cpu_atom@MACHINE_CLEARS.SLOW@)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_memory_ordering", - "MetricThreshold": "tma_memory_ordering > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", - "ScaleUnit": "100%", + "BriefDescription": "Ratio of mem load uops to all uops", + "MetricExpr": "1e3 * cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@ / UOPS_RETIRED.ALL", + "MetricName": "tma_info_mem_mix_memload_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", - "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots", - "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", - "MetricName": "tma_ms_uops", - "MetricThreshold": "tma_ms_uops > 0.05 & tma_retiring > 0.75", - "MetricgroupNoGroup": "TopdownL2", - "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction", + "MetricExpr": "100 * cpu_atom@SERIALIZATION.C01_MS_SCB@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricName": "tma_info_serialization _%_tpause_cycles", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_non_mem_scheduler", - "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", - "ScaleUnit": "100%", + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC", + "MetricName": "tma_info_system_cpu_utilization", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", - "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", - "MetricName": "tma_nuke", - "MetricThreshold": "tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", - "ScaleUnit": "100%", + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE_P@k / cpu_atom@CPU_CLK_UNHALTED.CORE@", + "MetricGroup": "Summary", + "MetricName": "tma_info_system_kernel_utilization", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", - "MetricName": "tma_other_fb", - "MetricThreshold": "tma_other_fb > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", - "ScaleUnit": "100%", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@ / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "tma_info_system_turbo_utilization", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", - "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_other_l1", - "MetricThreshold": "tma_other_l1 > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.FPDIV@ / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_fpdiv_uop_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.", - "MetricExpr": "max(0, tma_memory_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_other_load_store", - "MetricThreshold": "tma_other_load_store > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.IDIV@ / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_idiv_uop_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.", - "MetricExpr": "(cpu_atom@TOPDOWN_RETIRING.ALL@ - cpu_atom@UOPS_RETIRED.MS@ - cpu_atom@UOPS_RETIRED.FPDIV@) / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", - "MetricName": "tma_other_ret", - "MetricThreshold": "tma_other_ret > 0.3 & (tma_base > 0.6 & tma_retiring > 0.75)", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of all uops which are microcode ops", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.MS@ / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_microcode_uop_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults.", - "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.PAGE_FAULT@ / cpu_atom@MACHINE_CLEARS.SLOW@)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_page_fault", - "MetricThreshold": "tma_page_fault > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", - "ScaleUnit": "100%", + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.X87@ / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_x87_uop_ratio", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", - "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", - "MetricName": "tma_predecode", - "MetricThreshold": "tma_predecode > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_itlb_misses", + "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_register", - "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", + "MetricName": "tma_machine_clears", + "MetricThreshold": "tma_machine_clears > 0.05 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_reorder_buffer", - "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", - "ScaleUnit": "100%", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", - "MetricExpr": "tma_backend_bound", - "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_aux_group", - "MetricName": "tma_resource_bound", - "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2", - "MetricgroupNoGroup": "TopdownL2", - "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count.", + "MetricName": "tma_mem_scheduler", + "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that result in retirement slots.", - "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots", - "MetricGroup": "Default;TopdownL1;tma_L1_group", - "MetricName": "tma_retiring", - "MetricThreshold": "tma_retiring > 0.75", - "MetricgroupNoGroup": "TopdownL1;Default", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_non_mem_scheduler", + "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative", - "MetricExpr": "tma_mem_scheduler * cpu_atom@MEM_SCHEDULER_BLOCK.RSV@ / MEM_SCHEDULER_BLOCK.ALL", - "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", - "MetricName": "tma_rsv", - "MetricThreshold": "tma_rsv > 0.05 & (tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2))", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that requires the use of microcode (slow nuke)", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_nuke", + "MetricThreshold": "tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", - "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_serialization", - "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_other_fb", + "MetricThreshold": "tma_other_fb > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC.", - "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.SMC@ / cpu_atom@MACHINE_CLEARS.SLOW@)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_smc", - "MetricThreshold": "tma_smc > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_predecode", + "MetricThreshold": "tma_predecode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full", - "MetricExpr": "tma_store_bound", - "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", - "MetricName": "tma_st_buffer", - "MetricThreshold": "tma_st_buffer > 0.05 & (tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2))", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls)", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_register", + "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", - "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_stlb_hit", - "MetricThreshold": "tma_stlb_hit > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls)", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_reorder_buffer", + "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", - "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_stlb_miss", - "MetricThreshold": "tma_stlb_miss > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", + "BriefDescription": "Counts the number of cycles the core is stalled due to a resource limitation", + "MetricExpr": "tma_backend_bound - tma_core_bound", + "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_resource_bound", + "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.", - "MetricExpr": "tma_mem_scheduler * (cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@MEM_SCHEDULER_BLOCK.ALL@)", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_store_bound", - "MetricThreshold": "tma_store_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", + "BriefDescription": "Counts the number of issue slots that result in retirement slots", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "Default;TopdownL1;tma_L1_group", + "MetricName": "tma_retiring", + "MetricThreshold": "tma_retiring > 0.75", + "MetricgroupNoGroup": "TopdownL1;Default", "ScaleUnit": "100%", "Unit": "cpu_atom" }, { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", - "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_store_fwd_blk", - "MetricThreshold": "tma_store_fwd_blk > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS)", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / (5 * cpu_atom@CPU_CLK_UNHALTED.CORE@)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_serialization", + "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%", "Unit": "cpu_atom" }, diff --git a/scripts/perf/alderlake/cache.json b/scripts/perf/alderlake/cache.json index 922940ab..3f51686f 100644 --- a/scripts/perf/alderlake/cache.json +++ b/scripts/perf/alderlake/cache.json @@ -279,12 +279,22 @@ "UMask": "0x28", "Unit": "cpu_core" }, + { + "BriefDescription": "L2 writebacks that access L2 cache", + "Counter": "0,1,2,3", + "EventCode": "0x23", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x40", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", "Counter": "0,1,2,3,4,5", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.MISS", - "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", "SampleAfterValue": "200003", "UMask": "0x41", "Unit": "cpu_atom" @@ -304,7 +314,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", "SampleAfterValue": "200003", "UMask": "0x4f", "Unit": "cpu_atom" @@ -896,6 +906,17 @@ "UMask": "0x5", "Unit": "cpu_atom" }, + { + "BriefDescription": "Counts the number of load uops retired that performed one or more locks.", + "Counter": "0,1,2,3,4,5", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x21", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts the number of retired split load uops.", "Counter": "0,1,2,3,4,5", @@ -1047,6 +1068,16 @@ "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Cacheable and noncacheable code read requests", + "Counter": "0,1,2,3", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Demand Data Read requests sent to uncore", "Counter": "0,1,2,3", @@ -1057,6 +1088,16 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "Counter": "0,1,2,3", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", "Counter": "0,1,2,3", @@ -1079,6 +1120,17 @@ "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "CounterMask": "1", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.", "Counter": "0,1,2,3", @@ -1110,6 +1162,16 @@ "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", + "Counter": "0,1,2,3", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.", "Counter": "0,1,2,3", diff --git a/scripts/perf/alderlake/frontend.json b/scripts/perf/alderlake/frontend.json index 29ebcf51..66735a61 100644 --- a/scripts/perf/alderlake/frontend.json +++ b/scripts/perf/alderlake/frontend.json @@ -323,6 +323,17 @@ "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "ICACHE_DATA.STALL_PERIODS", + "Counter": "0,1,2,3", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x80", + "EventName": "ICACHE_DATA.STALL_PERIODS", + "SampleAfterValue": "500009", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "Counter": "0,1,2,3", diff --git a/scripts/perf/alderlake/memory.json b/scripts/perf/alderlake/memory.json index 81b450a8..81a03f53 100644 --- a/scripts/perf/alderlake/memory.json +++ b/scripts/perf/alderlake/memory.json @@ -125,6 +125,20 @@ "UMask": "0x9", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.", + "Counter": "1,2,3,4,5,6,7", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "53", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Counter": "1,2,3,4,5,6,7", diff --git a/scripts/perf/alderlake/metricgroups.json b/scripts/perf/alderlake/metricgroups.json index 05ce41fd..b54a5fc0 100644 --- a/scripts/perf/alderlake/metricgroups.json +++ b/scripts/perf/alderlake/metricgroups.json @@ -35,14 +35,17 @@ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Ifetch": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "IntVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Mem_Exec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", @@ -73,6 +76,7 @@ "TopdownL4": "Metrics for top-down breakdown at level 4", "TopdownL5": "Metrics for top-down breakdown at level 5", "TopdownL6": "Metrics for top-down breakdown at level 6", + "load_store_bound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "tma_L1_group": "Metrics for top-down breakdown at level 1", "tma_L2_group": "Metrics for top-down breakdown at level 2", "tma_L3_group": "Metrics for top-down breakdown at level 3", @@ -81,10 +85,8 @@ "tma_L6_group": "Metrics for top-down breakdown at level 6", "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category", "tma_assists_group": "Metrics contributing to tma_assists category", - "tma_backend_bound_aux_group": "Metrics contributing to tma_backend_bound_aux category", "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category", "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category", - "tma_base_group": "Metrics contributing to tma_base category", "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category", "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category", "tma_core_bound_group": "Metrics contributing to tma_core_bound category", @@ -97,6 +99,8 @@ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category", "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category", "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category", + "tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category", + "tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category", "tma_int_operations_group": "Metrics contributing to tma_int_operations category", "tma_issue2P": "Metrics related by the issue $issue2P", "tma_issueBM": "Metrics related by the issue $issueBM", @@ -123,11 +127,9 @@ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category", "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category", "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category", - "tma_mem_scheduler_group": "Metrics contributing to tma_mem_scheduler category", "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category", "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category", "tma_mite_group": "Metrics contributing to tma_mite category", - "tma_nuke_group": "Metrics contributing to tma_nuke category", "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category", "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category", "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category", diff --git a/scripts/perf/alderlake/other.json b/scripts/perf/alderlake/other.json index 53c08094..f95e093f 100644 --- a/scripts/perf/alderlake/other.json +++ b/scripts/perf/alderlake/other.json @@ -198,6 +198,15 @@ "UMask": "0x7", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)", + "Counter": "0,1,2,3,4,5", + "EventCode": "0x75", + "EventName": "SERIALIZATION.C01_MS_SCB", + "SampleAfterValue": "200003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycles the uncore cannot take further requests", "Counter": "0,1,2,3", diff --git a/scripts/perf/alderlake/pipeline.json b/scripts/perf/alderlake/pipeline.json index 76eeff08..b7656f77 100644 --- a/scripts/perf/alderlake/pipeline.json +++ b/scripts/perf/alderlake/pipeline.json @@ -1765,6 +1765,15 @@ "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of uops issued by the front end every cycle.", + "Counter": "0,1,2,3,4,5", + "EventCode": "0x0e", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2. Uops_issued correlates to the number of ROB entries. If uop takes 2 ROB slots it counts as 2 uops_issued.", + "SampleAfterValue": "200003", + "Unit": "cpu_atom" + }, { "BriefDescription": "Uops that RAT issues to RS", "Counter": "0,1,2,3,4,5,6,7", @@ -1775,6 +1784,16 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "UOPS_ISSUED.CYCLES", + "Counter": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xae", + "EventName": "UOPS_ISSUED.CYCLES", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the total number of uops retired.", "Counter": "0,1,2,3,4,5", diff --git a/scripts/perf/alderlaken/adln-metrics.json b/scripts/perf/alderlaken/adln-metrics.json index 4c753143..447596f9 100644 --- a/scripts/perf/alderlaken/adln-metrics.json +++ b/scripts/perf/alderlaken/adln-metrics.json @@ -85,39 +85,28 @@ "ScaleUnit": "1SMI#" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", - "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", - "MetricName": "tma_alloc_restriction", - "MetricThreshold": "tma_alloc_restriction > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions", + "MetricExpr": "tma_core_bound", + "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", + "MetricName": "tma_allocation_restriction", + "MetricThreshold": "tma_allocation_restriction > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.1)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots", + "MetricExpr": "TOPDOWN_BE_BOUND.ALL / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.1", "MetricgroupNoGroup": "TopdownL1;Default", - "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", - "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "tma_backend_bound", - "MetricGroup": "Default;TopdownL1;tma_L1_group", - "MetricName": "tma_backend_bound_aux", - "MetricThreshold": "tma_backend_bound_aux > 0.2", - "MetricgroupNoGroup": "TopdownL1;Default", - "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count", "ScaleUnit": "100%" }, { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "(tma_info_core_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_core_slots", + "MetricExpr": "(5 * CPU_CLK_UNHALTED.CORE - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", @@ -126,26 +115,17 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of uops that are not from the microsequencer.", - "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_core_slots", - "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", - "MetricName": "tma_base", - "MetricThreshold": "tma_base > 0.6 & tma_retiring > 0.75", - "MetricgroupNoGroup": "TopdownL2", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", "MetricName": "tma_branch_detect", - "MetricThreshold": "tma_branch_detect > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", - "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "MetricThreshold": "tma_branch_detect > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.05 & tma_bad_speculation > 0.15", @@ -153,24 +133,24 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", "MetricName": "tma_branch_resteer", - "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", - "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_FE_BOUND.CISC / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", "MetricName": "tma_cisc", - "MetricThreshold": "tma_cisc > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_cisc > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls.", - "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)", + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are bounded by core restrictions and not attributed to an outstanding load or stores, or resource limitation", + "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.1", @@ -178,248 +158,308 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", - "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", "MetricName": "tma_decode", - "MetricThreshold": "tma_decode > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_decode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_disambiguation", - "MetricThreshold": "tma_disambiguation > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that does not require the use of microcode, classified as a fast nuke, due to memory ordering, memory disambiguation and memory renaming", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_dram_bound", - "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "DefaultMetricgroupName": "TopdownL1", + "MetricExpr": "TOPDOWN_FE_BOUND.ALL / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "Default;TopdownL1;tma_L1_group", + "MetricName": "tma_frontend_bound", + "MetricThreshold": "tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1;Default", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", - "MetricName": "tma_fast_nuke", - "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_icache_misses", + "MetricThreshold": "tma_icache_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", - "MetricName": "tma_fetch_bandwidth", - "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2", + "MetricName": "tma_ifetch_bandwidth", + "MetricThreshold": "tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2", "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend latency restrictions due to icache misses, itlb misses, branch detection, and resteer limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", - "MetricName": "tma_fetch_latency", - "MetricThreshold": "tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2", + "MetricName": "tma_ifetch_latency", + "MetricThreshold": "tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2", "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_fp_assist", - "MetricThreshold": "tma_fp_assist > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss", + "MetricExpr": "100 * (LD_HEAD.DTLB_MISS_AT_RET + LD_HEAD.PGWALK_AT_RET) / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_bottleneck_%_dtlb_miss_bound_cycles" }, { - "BriefDescription": "Counts the number of floating point divide operations per uop.", - "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", - "MetricName": "tma_fpdiv_uops", - "MetricThreshold": "tma_fpdiv_uops > 0.2 & (tma_base > 0.6 & tma_retiring > 0.75)", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Ifetch", + "MetricName": "tma_info_bottleneck_%_ifetch_miss_bound_cycles", + "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", - "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots", - "MetricGroup": "Default;TopdownL1;tma_L1_group", - "MetricName": "tma_frontend_bound", - "MetricThreshold": "tma_frontend_bound > 0.2", - "MetricgroupNoGroup": "TopdownL1;Default", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss", + "MetricExpr": "100 * MEM_BOUND_STALLS.LOAD / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Load_Store_Miss", + "MetricName": "tma_info_bottleneck_%_load_miss_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", - "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", - "MetricName": "tma_icache_misses", - "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall", + "MetricExpr": "100 * LD_HEAD.ANY_AT_RET / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Mem_Exec", + "MetricName": "tma_info_bottleneck_%_mem_exec_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound" }, { - "BriefDescription": "", - "MetricExpr": "CPU_CLK_UNHALTED.CORE", - "MetricName": "tma_info_core_clks" + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipbranch" }, { - "BriefDescription": "", - "MetricExpr": "CPU_CLK_UNHALTED.CORE_P", - "MetricName": "tma_info_core_clks_p" + "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", + "MetricName": "tma_info_br_inst_mix_ipcall" + }, + { + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", + "MetricName": "tma_info_br_inst_mix_ipfarbranch" + }, + { + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", + "MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_ntaken" + }, + { + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_taken" + }, + { + "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", + "MetricName": "tma_info_br_inst_mix_ipmisp_indirect" + }, + { + "BriefDescription": "Instructions per retired return Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", + "MetricName": "tma_info_br_inst_mix_ipmisp_ret" + }, + { + "BriefDescription": "Instructions per retired Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipmispredict" + }, + { + "BriefDescription": "Ratio of all branches which mispredict", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_ratio" + }, + { + "BriefDescription": "Ratio between Mispredicted branches and unknown branches", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_to_unknown_branch_ratio" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.LD_BUF / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_%_load_buffer_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.RSV / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_%_mem_rsv_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.ST_BUF / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_%_store_buffer_stall_cycles" }, { "BriefDescription": "Cycles Per Instruction", - "MetricExpr": "tma_info_core_clks / INST_RETIRED.ANY", + "MetricExpr": "CPU_CLK_UNHALTED.CORE / INST_RETIRED.ANY", "MetricName": "tma_info_core_cpi" }, { "BriefDescription": "Instructions Per Cycle", - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.CORE", "MetricName": "tma_info_core_ipc" }, - { - "BriefDescription": "", - "MetricExpr": "5 * tma_info_core_clks", - "MetricName": "tma_info_core_slots" - }, { "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", "MetricName": "tma_info_core_upi" }, { - "BriefDescription": "Percent of instruction miss cost that hit in DRAM", + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l2hit" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3hit" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss subsequently misses in the L3", "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / MEM_BOUND_STALLS.IFETCH", - "MetricName": "tma_info_frontend_inst_miss_cost_dramhit_percent" + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3miss" }, { - "BriefDescription": "Percent of instruction miss cost that hit in the L2", - "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH", - "MetricName": "tma_info_frontend_inst_miss_cost_l2hit_percent" + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l2hit" }, { - "BriefDescription": "Percent of instruction miss cost that hit in the L3", - "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH", - "MetricName": "tma_info_frontend_inst_miss_cost_l3hit_percent" + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3hit" }, { - "BriefDescription": "Ratio of all branches which mispredict", - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", - "MetricName": "tma_info_inst_mix_branch_mispredict_ratio" + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3miss" }, { - "BriefDescription": "Ratio between Mispredicted branches and unknown branches", - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", - "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio" + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a pipeline block", + "MetricExpr": "100 * LD_HEAD.L1_BOUND_AT_RET / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_l1_bound" }, { - "BriefDescription": "Percentage of all uops which are FPDiv uops", - "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_fpdiv_uop_ratio" + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement", + "MetricExpr": "100 * (LD_HEAD.L1_BOUND_AT_RET + MEM_BOUND_STALLS.LOAD) / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_load_bound" }, { - "BriefDescription": "Percentage of all uops which are IDiv uops", - "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_idiv_uop_ratio" + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full", + "MetricExpr": "100 * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL) * tma_mem_scheduler", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_store_bound" }, { - "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", - "MetricName": "tma_info_inst_mix_ipbranch" + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to memory disambiguation", + "MetricExpr": "1e3 * MACHINE_CLEARS.DISAMBIGUATION / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_disamb_pki" }, { - "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", - "MetricName": "tma_info_inst_mix_ipcall" + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to floating point assists", + "MetricExpr": "1e3 * MACHINE_CLEARS.FP_ASSIST / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_fp_assist_pki" }, { - "BriefDescription": "Instructions per Far Branch", - "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)", - "MetricName": "tma_info_inst_mix_ipfarbranch" + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to memory ordering", + "MetricExpr": "1e3 * MACHINE_CLEARS.MEMORY_ORDERING / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_monuke_pki" }, { - "BriefDescription": "Instructions per Load", - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_inst_mix_ipload" + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to memory renaming", + "MetricExpr": "1e3 * MACHINE_CLEARS.MRN_NUKE / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_mrn_pki" }, { - "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", - "MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)", - "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken" + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to page faults", + "MetricExpr": "1e3 * MACHINE_CLEARS.PAGE_FAULT / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_page_fault_pki" }, { - "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", - "MetricName": "tma_info_inst_mix_ipmisp_cond_taken" + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to self-modifying code", + "MetricExpr": "1e3 * MACHINE_CLEARS.SMC / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_smc_pki" }, { - "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", - "MetricName": "tma_info_inst_mix_ipmisp_indirect" + "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block", + "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_adressaliasing" }, { - "BriefDescription": "Instructions per retired return Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", - "MetricName": "tma_info_inst_mix_ipmisp_ret" + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_storefwdblk" }, { - "BriefDescription": "Instructions per retired Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", - "MetricName": "tma_info_inst_mix_ipmispredict" + "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss", + "MetricExpr": "100 * LD_HEAD.L1_MISS_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_l1miss" }, { - "BriefDescription": "Instructions per Store", - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", - "MetricName": "tma_info_inst_mix_ipstore" + "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc", + "MetricExpr": "100 * LD_HEAD.OTHER_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_otherpipelineblks" }, { - "BriefDescription": "Percentage of all uops which are ucode ops", - "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_microcode_uop_ratio" + "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk", + "MetricExpr": "100 * LD_HEAD.PGWALK_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_pagewalk" }, { - "BriefDescription": "Percentage of all uops which are x87 uops", - "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL", - "MetricName": "tma_info_inst_mix_x87_uop_ratio" + "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss", + "MetricExpr": "100 * LD_HEAD.DTLB_MISS_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_stlbhit" }, { - "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block", - "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_l1_bound_address_alias_blocks" + "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match", + "MetricExpr": "100 * LD_HEAD.ST_ADDR_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_storefwding" }, { - "BriefDescription": "Percentage of total non-speculative loads that are splits", - "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_l1_bound_load_splits" + "BriefDescription": "Instructions per Load", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_ipload" }, { - "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", - "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", - "MetricName": "tma_info_l1_bound_store_fwd_blocks" + "BriefDescription": "Instructions per Store", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "tma_info_mem_mix_ipstore" }, { - "BriefDescription": "Cycle cost per DRAM hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit" + "BriefDescription": "Percentage of total non-speculative loads that perform one or more locks", + "MetricExpr": "100 * MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_locks_ratio" }, { - "BriefDescription": "Cycle cost per L2 hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", - "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit" + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_splits_ratio" }, { - "BriefDescription": "Cycle cost per LLC hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", - "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit" + "BriefDescription": "Ratio of mem load uops to all uops", + "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / UOPS_RETIRED.ALL", + "MetricName": "tma_info_mem_mix_memload_ratio" }, { - "BriefDescription": "load ops retired per 1000 instruction", - "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", - "MetricName": "tma_info_memory_memloadpki" + "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction", + "MetricExpr": "100 * SERIALIZATION.C01_MS_SCB / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricName": "tma_info_serialization _%_tpause_cycles" }, { "BriefDescription": "Average CPU Utilization", @@ -428,59 +468,47 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE", + "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE", "MetricGroup": "Summary", "MetricName": "tma_info_system_kernel_utilization" }, { "BriefDescription": "Average Frequency Utilization relative nominal frequency", - "MetricExpr": "tma_info_core_clks / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.CORE / CPU_CLK_UNHALTED.REF_TSC", "MetricGroup": "Power", "MetricName": "tma_info_system_turbo_utilization" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", - "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", - "MetricName": "tma_itlb_misses", - "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.15 & tma_frontend_bound > 0.2)", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_fpdiv_uop_ratio" }, { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", - "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_l1_bound", - "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_idiv_uop_ratio" }, { - "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_l2_bound", - "MetricThreshold": "tma_l2_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of all uops which are microcode ops", + "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_microcode_uop_ratio" }, { - "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_l3_bound", - "MetricThreshold": "tma_l3_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%" + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL", + "MetricName": "tma_info_uop_mix_x87_uop_ratio" }, { - "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full", - "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL", - "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", - "MetricName": "tma_ld_buffer", - "MetricThreshold": "tma_ld_buffer > 0.05 & (tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2))", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_itlb_misses", + "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots", + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.05 & tma_bad_speculation > 0.15", @@ -488,134 +516,74 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", - "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops", + "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_mem_scheduler", - "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", - "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_core_clks + tma_store_bound)", - "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", - "MetricName": "tma_memory_bound", - "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.1", - "MetricgroupNoGroup": "TopdownL2", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_memory_ordering", - "MetricThreshold": "tma_memory_ordering > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", - "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots", - "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", - "MetricName": "tma_ms_uops", - "MetricThreshold": "tma_ms_uops > 0.05 & tma_retiring > 0.75", - "MetricgroupNoGroup": "TopdownL2", - "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", - "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops", + "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_non_mem_scheduler", - "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that requires the use of microcode (slow nuke)", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", "MetricName": "tma_nuke", "MetricThreshold": "tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", - "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", "MetricName": "tma_other_fb", - "MetricThreshold": "tma_other_fb > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", - "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_other_l1", - "MetricThreshold": "tma_other_l1 > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.", - "MetricExpr": "max(0, tma_memory_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_other_load_store", - "MetricThreshold": "tma_other_load_store > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.", - "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", - "MetricName": "tma_other_ret", - "MetricThreshold": "tma_other_ret > 0.3 & (tma_base > 0.6 & tma_retiring > 0.75)", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_page_fault", - "MetricThreshold": "tma_page_fault > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", + "MetricThreshold": "tma_other_fb > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", - "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots", - "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / (5 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", "MetricName": "tma_predecode", - "MetricThreshold": "tma_predecode > 0.05 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "MetricThreshold": "tma_predecode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", - "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls)", + "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_register", - "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", - "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls)", + "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_reorder_buffer", - "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", + "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", - "MetricExpr": "tma_backend_bound", - "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_aux_group", + "BriefDescription": "Counts the number of cycles the core is stalled due to a resource limitation", + "MetricExpr": "tma_backend_bound - tma_core_bound", + "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_resource_bound", - "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2", + "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound > 0.1", "MetricgroupNoGroup": "TopdownL2", - "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count.", "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of issue slots that result in retirement slots.", + "BriefDescription": "Counts the number of issue slots that result in retirement slots", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots", + "MetricExpr": "TOPDOWN_RETIRING.ALL / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.75", @@ -623,67 +591,11 @@ "ScaleUnit": "100%" }, { - "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative", - "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL", - "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", - "MetricName": "tma_rsv", - "MetricThreshold": "tma_rsv > 0.05 & (tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", - "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots", + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS)", + "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / (5 * CPU_CLK_UNHALTED.CORE)", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_serialization", - "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2)", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC.", - "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)", - "MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group", - "MetricName": "tma_smc", - "MetricThreshold": "tma_smc > 0.02 & (tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full", - "MetricExpr": "tma_store_bound", - "MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group", - "MetricName": "tma_st_buffer", - "MetricThreshold": "tma_st_buffer > 0.05 & (tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound_aux > 0.2))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", - "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_stlb_hit", - "MetricThreshold": "tma_stlb_hit > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", - "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_stlb_miss", - "MetricThreshold": "tma_stlb_miss > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.", - "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", - "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", - "MetricName": "tma_store_bound", - "MetricThreshold": "tma_store_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1)", - "ScaleUnit": "100%" - }, - { - "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", - "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", - "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", - "MetricName": "tma_store_fwd_blk", - "MetricThreshold": "tma_store_fwd_blk > 0.05 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.1))", + "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", "ScaleUnit": "100%" } ] diff --git a/scripts/perf/alderlaken/cache.json b/scripts/perf/alderlaken/cache.json index 31aff48d..1500033e 100644 --- a/scripts/perf/alderlaken/cache.json +++ b/scripts/perf/alderlaken/cache.json @@ -4,7 +4,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.MISS", - "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", "SampleAfterValue": "200003", "UMask": "0x41" }, @@ -13,7 +13,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x2e", "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", "SampleAfterValue": "200003", "UMask": "0x4f" }, @@ -274,6 +274,16 @@ "SampleAfterValue": "1000003", "UMask": "0x5" }, + { + "BriefDescription": "Counts the number of load uops retired that performed one or more locks.", + "Counter": "0,1,2,3,4,5", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x21" + }, { "BriefDescription": "Counts the number of retired split load uops.", "Counter": "0,1,2,3,4,5", diff --git a/scripts/perf/alderlaken/metricgroups.json b/scripts/perf/alderlaken/metricgroups.json index 7b2049cd..40984c23 100644 --- a/scripts/perf/alderlaken/metricgroups.json +++ b/scripts/perf/alderlaken/metricgroups.json @@ -1,26 +1,23 @@ { + "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Ifetch": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Mem_Exec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "TopdownL1": "Metrics for top-down breakdown at level 1", "TopdownL2": "Metrics for top-down breakdown at level 2", "TopdownL3": "Metrics for top-down breakdown at level 3", - "TopdownL4": "Metrics for top-down breakdown at level 4", + "load_store_bound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", "tma_L1_group": "Metrics for top-down breakdown at level 1", "tma_L2_group": "Metrics for top-down breakdown at level 2", "tma_L3_group": "Metrics for top-down breakdown at level 3", - "tma_L4_group": "Metrics for top-down breakdown at level 4", - "tma_backend_bound_aux_group": "Metrics contributing to tma_backend_bound_aux category", "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category", "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category", - "tma_base_group": "Metrics contributing to tma_base category", - "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category", - "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category", + "tma_core_bound_group": "Metrics contributing to tma_core_bound category", "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category", - "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category", + "tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category", + "tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category", "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category", - "tma_mem_scheduler_group": "Metrics contributing to tma_mem_scheduler category", - "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category", - "tma_nuke_group": "Metrics contributing to tma_nuke category", - "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category", - "tma_retiring_group": "Metrics contributing to tma_retiring category" + "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category" } diff --git a/scripts/perf/alderlaken/other.json b/scripts/perf/alderlaken/other.json index 1998affd..54ddbe2b 100644 --- a/scripts/perf/alderlaken/other.json +++ b/scripts/perf/alderlaken/other.json @@ -48,5 +48,13 @@ "MSRValue": "0x10800", "SampleAfterValue": "100003", "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)", + "Counter": "0,1,2,3,4,5", + "EventCode": "0x75", + "EventName": "SERIALIZATION.C01_MS_SCB", + "SampleAfterValue": "200003", + "UMask": "0x4" } ] diff --git a/scripts/perf/alderlaken/pipeline.json b/scripts/perf/alderlaken/pipeline.json index 9315dca9..f05db455 100644 --- a/scripts/perf/alderlaken/pipeline.json +++ b/scripts/perf/alderlaken/pipeline.json @@ -591,6 +591,14 @@ "PEBS": "1", "SampleAfterValue": "1000003" }, + { + "BriefDescription": "Counts the number of uops issued by the front end every cycle.", + "Counter": "0,1,2,3,4,5", + "EventCode": "0x0e", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2. Uops_issued correlates to the number of ROB entries. If uop takes 2 ROB slots it counts as 2 uops_issued.", + "SampleAfterValue": "200003" + }, { "BriefDescription": "Counts the total number of uops retired.", "Counter": "0,1,2,3,4,5", diff --git a/scripts/perf/broadwell/counter.json b/scripts/perf/broadwell/counter.json index f0d685b1..1be6522e 100644 --- a/scripts/perf/broadwell/counter.json +++ b/scripts/perf/broadwell/counter.json @@ -15,7 +15,7 @@ "CountersNumGeneric": "2" }, { - "Unit": "CLOCK", + "Unit": "cbox_0", "CountersNumFixed": 1, "CountersNumGeneric": "0" } diff --git a/scripts/perf/broadwell/uncore-cache.json b/scripts/perf/broadwell/uncore-cache.json index 773285e9..c4c57feb 100644 --- a/scripts/perf/broadwell/uncore-cache.json +++ b/scripts/perf/broadwell/uncore-cache.json @@ -144,5 +144,14 @@ "PerPkg": "1", "UMask": "0x41", "Unit": "CBOX" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", + "Counter": "FIXED", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Unit": "cbox_0" } ] diff --git a/scripts/perf/grandridge/grr-metrics.json b/scripts/perf/grandridge/grr-metrics.json new file mode 100644 index 00000000..07e54229 --- /dev/null +++ b/scripts/perf/grandridge/grr-metrics.json @@ -0,0 +1,849 @@ +[ + { + "BriefDescription": "C1 residency percent per core", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per module", + "MetricExpr": "cstate_module@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Module_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY", + "MetricName": "cpi", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "CPU operating frequency (in GHz)", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9", + "MetricName": "cpu_operating_frequency", + "ScaleUnit": "1GHz" + }, + { + "BriefDescription": "Percentage of time spent in the active CPU power state C0", + "MetricExpr": "tma_info_system_cpu_utilization", + "MetricName": "cpu_utilization", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions", + "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", + "MetricName": "dtlb_2nd_level_2mb_large_page_load_mpi", + "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the Data Translation Lookaside Buffer (DTLB) and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions", + "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricName": "dtlb_2nd_level_load_mpi", + "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions", + "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricName": "dtlb_2nd_level_store_mpi", + "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions", + "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", + "MetricName": "itlb_2nd_level_large_page_mpi", + "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions", + "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricName": "itlb_2nd_level_mpi", + "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions", + "MetricExpr": "ICACHE.MISSES / INST_RETIRED.ANY", + "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions", + "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L1_HIT / INST_RETIRED.ANY", + "MetricName": "l1d_demand_data_read_hits_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions", + "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_HIT / INST_RETIRED.ANY", + "MetricName": "l2_demand_data_read_hits_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions", + "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "MetricName": "l2_demand_data_read_mpi", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions", + "MetricExpr": "LONGEST_LAT_CACHE.REFERENCE / INST_RETIRED.ANY", + "MetricName": "l2_mpi", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_CRD + UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF) / INST_RETIRED.ANY", + "MetricName": "llc_code_read_mpi_demand_plus_prefetch", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF + UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA) / INST_RETIRED.ANY", + "MetricName": "llc_data_read_mpi_demand_plus_prefetch", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) in nano seconds", + "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT) * #num_packages)) * duration_time", + "MetricName": "llc_demand_data_read_miss_latency", + "ScaleUnit": "1ns" + }, + { + "BriefDescription": "Load operations retired per instruction", + "MetricExpr": "MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", + "MetricName": "loads_retired_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "DDR memory read bandwidth (MB/sec)", + "MetricExpr": "(UNC_M_CAS_COUNT_SCH0.RD + UNC_M_CAS_COUNT_SCH1.RD) * 64 / 1e6 / duration_time", + "MetricName": "memory_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "DDR memory bandwidth (MB/sec)", + "MetricExpr": "(UNC_M_CAS_COUNT_SCH0.RD + UNC_M_CAS_COUNT_SCH1.RD + UNC_M_CAS_COUNT_SCH0.WR + UNC_M_CAS_COUNT_SCH1.WR) * 64 / 1e6 / duration_time", + "MetricName": "memory_bandwidth_total", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "DDR memory write bandwidth (MB/sec)", + "MetricExpr": "(UNC_M_CAS_COUNT_SCH0.WR + UNC_M_CAS_COUNT_SCH1.WR) * 64 / 1e6 / duration_time", + "MetricName": "memory_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Percentage of cycles spent in System Management Interrupts.", + "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)", + "MetricGroup": "smi", + "MetricName": "smi_cycles", + "MetricThreshold": "smi_cycles > 0.1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Number of SMI interrupts.", + "MetricExpr": "msr@smi@", + "MetricGroup": "smi", + "MetricName": "smi_num", + "ScaleUnit": "1SMI#" + }, + { + "BriefDescription": "Store operations retired per instruction", + "MetricExpr": "MEM_UOPS_RETIRED.ALL_STORES / INST_RETIRED.ANY", + "MetricName": "stores_retired_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions", + "MetricExpr": "tma_core_bound", + "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", + "MetricName": "tma_allocation_restriction", + "MetricThreshold": "tma_allocation_restriction > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "TOPDOWN_BE_BOUND.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_backend_bound", + "MetricThreshold": "tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL1", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_bad_speculation", + "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_branch_detect", + "MetricThreshold": "tma_branch_detect > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", + "MetricName": "tma_branch_mispredicts", + "MetricThreshold": "tma_branch_mispredicts > 0.05 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_branch_resteer", + "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_FE_BOUND.CISC / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_cisc", + "MetricThreshold": "tma_cisc > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are bounded by core restrictions and not attributed to an outstanding load or stores, or resource limitation", + "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_core_bound", + "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_decode", + "MetricThreshold": "tma_decode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that does not require the use of microcode, classified as a fast nuke, due to memory ordering, memory disambiguation and memory renaming", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_frontend_bound", + "MetricThreshold": "tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_icache_misses", + "MetricThreshold": "tma_icache_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", + "MetricName": "tma_ifetch_bandwidth", + "MetricThreshold": "tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend latency restrictions due to icache misses, itlb misses, branch detection, and resteer limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", + "MetricName": "tma_ifetch_latency", + "MetricThreshold": "tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation", + "MetricExpr": "INST_RETIRED.ANY / FP_FLOPS_RETIRED.ALL", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipflop" + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction", + "MetricExpr": "INST_RETIRED.ANY / (FP_INST_RETIRED.128B_DP + FP_INST_RETIRED.128B_SP)", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipfparith_avx128" + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction", + "MetricExpr": "INST_RETIRED.ANY / FP_INST_RETIRED.64B_DP", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipfparith_scalar_dp" + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction", + "MetricExpr": "INST_RETIRED.ANY / FP_INST_RETIRED.32B_SP", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipfparith_scalar_sp" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss", + "MetricExpr": "tma_info_bottleneck_dtlb_miss_bound_cycles", + "MetricName": "tma_info_bottleneck_%_dtlb_miss_bound_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss", + "MetricExpr": "tma_info_bottleneck_ifetch_miss_bound_cycles", + "MetricGroup": "Ifetch", + "MetricName": "tma_info_bottleneck_%_ifetch_miss_bound_cycles", + "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss", + "MetricExpr": "tma_info_bottleneck_load_miss_bound_cycles", + "MetricGroup": "Load_Store_Miss", + "MetricName": "tma_info_bottleneck_%_load_miss_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall", + "MetricExpr": "tma_info_bottleneck_mem_exec_bound_cycles", + "MetricGroup": "Mem_Exec", + "MetricName": "tma_info_bottleneck_%_mem_exec_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss", + "MetricExpr": "100 * (LD_HEAD.DTLB_MISS_AT_RET + LD_HEAD.PGWALK_AT_RET) / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles", + "MetricName": "tma_info_bottleneck_dtlb_miss_bound_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.ALL / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles;Ifetch", + "MetricName": "tma_info_bottleneck_ifetch_miss_bound_cycles", + "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.ALL / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles;Load_Store_Miss", + "MetricName": "tma_info_bottleneck_load_miss_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall", + "MetricExpr": "100 * LD_HEAD.ANY_AT_RET / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles;Mem_Exec", + "MetricName": "tma_info_bottleneck_mem_exec_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipbranch" + }, + { + "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "MetricName": "tma_info_br_inst_mix_ipcall" + }, + { + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", + "MetricName": "tma_info_br_inst_mix_ipfarbranch" + }, + { + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", + "MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_ntaken" + }, + { + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_taken" + }, + { + "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", + "MetricName": "tma_info_br_inst_mix_ipmisp_indirect" + }, + { + "BriefDescription": "Instructions per retired return Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", + "MetricName": "tma_info_br_inst_mix_ipmisp_ret" + }, + { + "BriefDescription": "Instructions per retired Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipmispredict" + }, + { + "BriefDescription": "Ratio of all branches which mispredict", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_ratio" + }, + { + "BriefDescription": "Ratio between Mispredicted branches and unknown branches", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_to_unknown_branch_ratio" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full", + "MetricExpr": "tma_info_buffer_stalls_load_buffer_stall_cycles", + "MetricName": "tma_info_buffer_stalls_%_load_buffer_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full", + "MetricExpr": "tma_info_buffer_stalls_mem_rsv_stall_cycles", + "MetricName": "tma_info_buffer_stalls_%_mem_rsv_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full", + "MetricExpr": "tma_info_buffer_stalls_store_buffer_stall_cycles", + "MetricName": "tma_info_buffer_stalls_%_store_buffer_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.LD_BUF / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_load_buffer_stall_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.RSV / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_mem_rsv_stall_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.ST_BUF / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_store_buffer_stall_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Cycles Per Instruction", + "MetricExpr": "CPU_CLK_UNHALTED.CORE / INST_RETIRED.ANY", + "MetricName": "tma_info_core_cpi" + }, + { + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricExpr": "FP_FLOPS_RETIRED.ALL / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Flops", + "MetricName": "tma_info_core_flopc" + }, + { + "BriefDescription": "Instructions Per Cycle", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_core_ipc" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "TOPDOWN_RETIRING.ALL_P / INST_RETIRED.ANY", + "MetricName": "tma_info_core_upi" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2", + "MetricExpr": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l2hit", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l2hit" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3", + "MetricExpr": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l3hit", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3hit" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss subsequently misses in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.LLC_MISS / MEM_BOUND_STALLS_IFETCH.ALL", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3miss" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.L2_HIT / MEM_BOUND_STALLS_IFETCH.ALL", + "MetricName": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l2hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.LLC_HIT / MEM_BOUND_STALLS_IFETCH.ALL", + "MetricName": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l3hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2", + "MetricExpr": "tma_info_load_miss_bound_loadmissbound_with_l2hit", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l2hit" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3", + "MetricExpr": "tma_info_load_miss_bound_loadmissbound_with_l3hit", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3hit" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.LLC_MISS / MEM_BOUND_STALLS_LOAD.ALL", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3miss" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.L2_HIT / MEM_BOUND_STALLS_LOAD.ALL", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_loadmissbound_with_l2hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.LLC_HIT / MEM_BOUND_STALLS_LOAD.ALL", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_loadmissbound_with_l3hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a pipeline block", + "MetricExpr": "100 * LD_HEAD.L1_BOUND_AT_RET / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_l1_bound" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement", + "MetricExpr": "100 * (LD_HEAD.L1_BOUND_AT_RET + MEM_BOUND_STALLS_LOAD.ALL) / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_load_bound" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full", + "MetricExpr": "100 * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL) * tma_mem_scheduler", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_store_bound" + }, + { + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to floating point assists", + "MetricExpr": "1e3 * MACHINE_CLEARS.FP_ASSIST / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_fp_assist_pki" + }, + { + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to page faults", + "MetricExpr": "1e3 * MACHINE_CLEARS.PAGE_FAULT / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_page_fault_pki" + }, + { + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to self-modifying code", + "MetricExpr": "1e3 * MACHINE_CLEARS.SMC / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_smc_pki" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block", + "MetricExpr": "tma_info_mem_exec_blocks_loads_with_adressaliasing", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_adressaliasing" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "tma_info_mem_exec_blocks_loads_with_storefwdblk", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_storefwdblk" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block", + "MetricExpr": "100 * LD_BLOCKS.ADDRESS_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_loads_with_adressaliasing", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_loads_with_storefwdblk", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_l1miss", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_l1miss" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_otherpipelineblks", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_otherpipelineblks" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_pagewalk", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_pagewalk" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_stlbhit", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_stlbhit" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_storefwding", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_storefwding" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss", + "MetricExpr": "100 * LD_HEAD.L1_MISS_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_l1miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc", + "MetricExpr": "100 * LD_HEAD.OTHER_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_otherpipelineblks", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk", + "MetricExpr": "100 * LD_HEAD.PGWALK_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_pagewalk", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss", + "MetricExpr": "100 * LD_HEAD.DTLB_MISS_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_stlbhit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match", + "MetricExpr": "100 * LD_HEAD.ST_ADDR_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_storefwding", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Instructions per Load", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_ipload" + }, + { + "BriefDescription": "Instructions per Store", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "tma_info_mem_mix_ipstore" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that perform one or more locks", + "MetricExpr": "100 * MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_locks_ratio" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_splits_ratio" + }, + { + "BriefDescription": "Ratio of mem load uops to all uops", + "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_mem_mix_memload_ratio" + }, + { + "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction", + "MetricExpr": "tma_info_serialization_tpause_cycles", + "MetricName": "tma_info_serialization _%_tpause_cycles" + }, + { + "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction", + "MetricExpr": "100 * SERIALIZATION.C01_MS_SCB / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricName": "tma_info_serialization_tpause_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricName": "tma_info_system_cpu_utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "FP_FLOPS_RETIRED.ALL / (duration_time * 1e9)", + "MetricGroup": "Flops", + "MetricName": "tma_info_system_gflops", + "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Summary", + "MetricName": "tma_info_system_kernel_utilization" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.CORE / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "tma_info_system_turbo_utilization" + }, + { + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.FPDIV / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_fpdiv_uop_ratio" + }, + { + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.IDIV / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_idiv_uop_ratio" + }, + { + "BriefDescription": "Percentage of all uops which are microcode ops", + "MetricExpr": "100 * UOPS_RETIRED.MS / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_microcode_uop_ratio" + }, + { + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * UOPS_RETIRED.X87 / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_x87_uop_ratio" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ITLB_MISS / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_itlb_misses", + "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", + "MetricName": "tma_machine_clears", + "MetricThreshold": "tma_machine_clears > 0.05 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops", + "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_mem_scheduler", + "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops", + "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_non_mem_scheduler", + "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that requires the use of microcode (slow nuke)", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_nuke", + "MetricThreshold": "tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_other_fb", + "MetricThreshold": "tma_other_fb > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_predecode", + "MetricThreshold": "tma_predecode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls)", + "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_register", + "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls)", + "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_reorder_buffer", + "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a resource limitation", + "MetricExpr": "tma_backend_bound - tma_core_bound", + "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_resource_bound", + "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that result in retirement slots", + "MetricExpr": "TOPDOWN_RETIRING.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_retiring", + "MetricThreshold": "tma_retiring > 0.75", + "MetricgroupNoGroup": "TopdownL1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS)", + "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_serialization", + "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Uncore operating frequency in GHz", + "MetricExpr": "UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages) / 1e9 / duration_time", + "MetricName": "uncore_frequency", + "ScaleUnit": "1GHz" + } +] diff --git a/scripts/perf/grandridge/metricgroups.json b/scripts/perf/grandridge/metricgroups.json new file mode 100644 index 00000000..40984c23 --- /dev/null +++ b/scripts/perf/grandridge/metricgroups.json @@ -0,0 +1,23 @@ +{ + "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Ifetch": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Mem_Exec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "TopdownL1": "Metrics for top-down breakdown at level 1", + "TopdownL2": "Metrics for top-down breakdown at level 2", + "TopdownL3": "Metrics for top-down breakdown at level 3", + "load_store_bound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "tma_L1_group": "Metrics for top-down breakdown at level 1", + "tma_L2_group": "Metrics for top-down breakdown at level 2", + "tma_L3_group": "Metrics for top-down breakdown at level 3", + "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category", + "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category", + "tma_core_bound_group": "Metrics contributing to tma_core_bound category", + "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category", + "tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category", + "tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category", + "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category", + "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category" +} diff --git a/scripts/perf/haswell/counter.json b/scripts/perf/haswell/counter.json index f0d685b1..1be6522e 100644 --- a/scripts/perf/haswell/counter.json +++ b/scripts/perf/haswell/counter.json @@ -15,7 +15,7 @@ "CountersNumGeneric": "2" }, { - "Unit": "CLOCK", + "Unit": "cbox_0", "CountersNumFixed": 1, "CountersNumGeneric": "0" } diff --git a/scripts/perf/haswell/uncore-cache.json b/scripts/perf/haswell/uncore-cache.json index 8379dae9..fb116637 100644 --- a/scripts/perf/haswell/uncore-cache.json +++ b/scripts/perf/haswell/uncore-cache.json @@ -223,5 +223,13 @@ "PerPkg": "1", "UMask": "0x41", "Unit": "CBOX" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "Unit": "cbox_0" } ] diff --git a/scripts/perf/mapfile.csv b/scripts/perf/mapfile.csv index 5d7fe9a3..af5318e9 100644 --- a/scripts/perf/mapfile.csv +++ b/scripts/perf/mapfile.csv @@ -1,5 +1,5 @@ -GenuineIntel-6-(97|9A|B7|BA|BF),v1.26,alderlake,core -GenuineIntel-6-BE,v1.26,alderlaken,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.27,alderlake,core +GenuineIntel-6-BE,v1.27,alderlaken,core GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core GenuineIntel-6-(3D|47),v29,broadwell,core GenuineIntel-6-56,v11,broadwellde,core diff --git a/scripts/perf/sierraforest/metricgroups.json b/scripts/perf/sierraforest/metricgroups.json new file mode 100644 index 00000000..40984c23 --- /dev/null +++ b/scripts/perf/sierraforest/metricgroups.json @@ -0,0 +1,23 @@ +{ + "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Ifetch": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Load_Store_Miss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Mem_Exec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "TopdownL1": "Metrics for top-down breakdown at level 1", + "TopdownL2": "Metrics for top-down breakdown at level 2", + "TopdownL3": "Metrics for top-down breakdown at level 3", + "load_store_bound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet", + "tma_L1_group": "Metrics for top-down breakdown at level 1", + "tma_L2_group": "Metrics for top-down breakdown at level 2", + "tma_L3_group": "Metrics for top-down breakdown at level 3", + "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category", + "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category", + "tma_core_bound_group": "Metrics contributing to tma_core_bound category", + "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category", + "tma_ifetch_bandwidth_group": "Metrics contributing to tma_ifetch_bandwidth category", + "tma_ifetch_latency_group": "Metrics contributing to tma_ifetch_latency category", + "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category", + "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category" +} diff --git a/scripts/perf/sierraforest/srf-metrics.json b/scripts/perf/sierraforest/srf-metrics.json new file mode 100644 index 00000000..b881b195 --- /dev/null +++ b/scripts/perf/sierraforest/srf-metrics.json @@ -0,0 +1,927 @@ +[ + { + "BriefDescription": "C1 residency percent per core", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per module", + "MetricExpr": "cstate_module@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Module_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY", + "MetricName": "cpi", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "CPU operating frequency (in GHz)", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9", + "MetricName": "cpu_operating_frequency", + "ScaleUnit": "1GHz" + }, + { + "BriefDescription": "Percentage of time spent in the active CPU power state C0", + "MetricExpr": "tma_info_system_cpu_utilization", + "MetricName": "cpu_utilization", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions", + "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", + "MetricName": "dtlb_2nd_level_2mb_large_page_load_mpi", + "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the Data Translation Lookaside Buffer (DTLB) and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions", + "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricName": "dtlb_2nd_level_load_mpi", + "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions", + "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricName": "dtlb_2nd_level_store_mpi", + "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Bandwidth observed by the integrated I/O traffic contoller (IIO) of IO reads that are initiated by end device controllers that are requesting memory from the CPU.", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.ALL_PARTS * 4 / 1e6 / duration_time", + "MetricName": "iio_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth observed by the integrated I/O traffic controller (IIO) of IO writes that are initiated by end device controllers that are writing memory to the CPU.", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.ALL_PARTS * 4 / 1e6 / duration_time", + "MetricName": "iio_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket.", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read_local", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from a remote CPU socket.", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_REMOTE * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read_remote", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket.", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write_local", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to a remote CPU socket.", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_REMOTE + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_REMOTE) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write_remote", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions", + "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", + "MetricName": "itlb_2nd_level_large_page_mpi", + "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions", + "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", + "MetricName": "itlb_2nd_level_mpi", + "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions", + "MetricExpr": "ICACHE.MISSES / INST_RETIRED.ANY", + "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions", + "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L1_HIT / INST_RETIRED.ANY", + "MetricName": "l1d_demand_data_read_hits_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions", + "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_HIT / INST_RETIRED.ANY", + "MetricName": "l2_demand_data_read_hits_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions", + "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "MetricName": "l2_demand_data_read_mpi", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions", + "MetricExpr": "LONGEST_LAT_CACHE.REFERENCE / INST_RETIRED.ANY", + "MetricName": "l2_mpi", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_CRD + UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF) / INST_RETIRED.ANY", + "MetricName": "llc_code_read_mpi_demand_plus_prefetch", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF + UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA) / INST_RETIRED.ANY", + "MetricName": "llc_data_read_mpi_demand_plus_prefetch", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) in nano seconds", + "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT) * #num_packages)) * duration_time", + "MetricName": "llc_demand_data_read_miss_latency", + "ScaleUnit": "1ns" + }, + { + "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to local memory.", + "MetricExpr": "UNC_CHA_REQUESTS.READS_LOCAL * 64 / 1e6 / duration_time", + "MetricName": "llc_miss_local_memory_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to local memory.", + "MetricExpr": "UNC_CHA_REQUESTS.WRITES_LOCAL * 64 / 1e6 / duration_time", + "MetricName": "llc_miss_local_memory_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to remote memory.", + "MetricExpr": "UNC_CHA_REQUESTS.READS_REMOTE * 64 / 1e6 / duration_time", + "MetricName": "llc_miss_remote_memory_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.", + "MetricExpr": "UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1e6 / duration_time", + "MetricName": "llc_miss_remote_memory_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Load operations retired per instruction", + "MetricExpr": "MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", + "MetricName": "loads_retired_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "DDR memory read bandwidth (MB/sec)", + "MetricExpr": "(UNC_M_CAS_COUNT_SCH0.RD + UNC_M_CAS_COUNT_SCH1.RD) * 64 / 1e6 / duration_time", + "MetricName": "memory_bandwidth_read", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "DDR memory bandwidth (MB/sec)", + "MetricExpr": "(UNC_M_CAS_COUNT_SCH0.RD + UNC_M_CAS_COUNT_SCH1.RD + UNC_M_CAS_COUNT_SCH0.WR + UNC_M_CAS_COUNT_SCH1.WR) * 64 / 1e6 / duration_time", + "MetricName": "memory_bandwidth_total", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "DDR memory write bandwidth (MB/sec)", + "MetricExpr": "(UNC_M_CAS_COUNT_SCH0.WR + UNC_M_CAS_COUNT_SCH1.WR) * 64 / 1e6 / duration_time", + "MetricName": "memory_bandwidth_write", + "ScaleUnit": "1MB/s" + }, + { + "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_LOCAL) / (UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_REMOTE)", + "MetricName": "numa_reads_addressed_to_local_dram", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_REMOTE) / (UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_REMOTE)", + "MetricName": "numa_reads_addressed_to_remote_dram", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of cycles spent in System Management Interrupts.", + "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)", + "MetricGroup": "smi", + "MetricName": "smi_cycles", + "MetricThreshold": "smi_cycles > 0.1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Number of SMI interrupts.", + "MetricExpr": "msr@smi@", + "MetricGroup": "smi", + "MetricName": "smi_num", + "ScaleUnit": "1SMI#" + }, + { + "BriefDescription": "Store operations retired per instruction", + "MetricExpr": "MEM_UOPS_RETIRED.ALL_STORES / INST_RETIRED.ANY", + "MetricName": "stores_retired_per_instr", + "ScaleUnit": "1per_instr" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions", + "MetricExpr": "tma_core_bound", + "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", + "MetricName": "tma_allocation_restriction", + "MetricThreshold": "tma_allocation_restriction > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "TOPDOWN_BE_BOUND.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_backend_bound", + "MetricThreshold": "tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL1", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_bad_speculation", + "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_branch_detect", + "MetricThreshold": "tma_branch_detect > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", + "MetricName": "tma_branch_mispredicts", + "MetricThreshold": "tma_branch_mispredicts > 0.05 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_branch_resteer", + "MetricThreshold": "tma_branch_resteer > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_FE_BOUND.CISC / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_cisc", + "MetricThreshold": "tma_cisc > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are bounded by core restrictions and not attributed to an outstanding load or stores, or resource limitation", + "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_core_bound", + "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_decode", + "MetricThreshold": "tma_decode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that does not require the use of microcode, classified as a fast nuke, due to memory ordering, memory disambiguation and memory renaming", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "MetricThreshold": "tma_fast_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_frontend_bound", + "MetricThreshold": "tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_icache_misses", + "MetricThreshold": "tma_icache_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", + "MetricName": "tma_ifetch_bandwidth", + "MetricThreshold": "tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend latency restrictions due to icache misses, itlb misses, branch detection, and resteer limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", + "MetricName": "tma_ifetch_latency", + "MetricThreshold": "tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation", + "MetricExpr": "INST_RETIRED.ANY / FP_FLOPS_RETIRED.ALL", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipflop" + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction", + "MetricExpr": "INST_RETIRED.ANY / (FP_INST_RETIRED.128B_DP + FP_INST_RETIRED.128B_SP)", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipfparith_avx128" + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction", + "MetricExpr": "INST_RETIRED.ANY / FP_INST_RETIRED.64B_DP", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipfparith_scalar_dp" + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction", + "MetricExpr": "INST_RETIRED.ANY / FP_INST_RETIRED.32B_SP", + "MetricGroup": "Flops", + "MetricName": "tma_info_arith_inst_mix_ipfparith_scalar_sp" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss", + "MetricExpr": "tma_info_bottleneck_dtlb_miss_bound_cycles", + "MetricName": "tma_info_bottleneck_%_dtlb_miss_bound_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss", + "MetricExpr": "tma_info_bottleneck_ifetch_miss_bound_cycles", + "MetricGroup": "Ifetch", + "MetricName": "tma_info_bottleneck_%_ifetch_miss_bound_cycles", + "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss", + "MetricExpr": "tma_info_bottleneck_load_miss_bound_cycles", + "MetricGroup": "Load_Store_Miss", + "MetricName": "tma_info_bottleneck_%_load_miss_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall", + "MetricExpr": "tma_info_bottleneck_mem_exec_bound_cycles", + "MetricGroup": "Mem_Exec", + "MetricName": "tma_info_bottleneck_%_mem_exec_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to a first level data TLB miss", + "MetricExpr": "100 * (LD_HEAD.DTLB_MISS_AT_RET + LD_HEAD.PGWALK_AT_RET) / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles", + "MetricName": "tma_info_bottleneck_dtlb_miss_bound_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.ALL / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles;Ifetch", + "MetricName": "tma_info_bottleneck_ifetch_miss_bound_cycles", + "PublicDescription": "Percentage of time that allocation and retirement is stalled by the Frontend Cluster due to an Ifetch Miss, either Icache or ITLB Miss. See Info.Ifetch_Bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled due to an L1 miss", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.ALL / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles;Load_Store_Miss", + "MetricName": "tma_info_bottleneck_load_miss_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled due to an L1 miss. See Info.Load_Miss_Bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall", + "MetricExpr": "100 * LD_HEAD.ANY_AT_RET / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Cycles;Mem_Exec", + "MetricName": "tma_info_bottleneck_mem_exec_bound_cycles", + "PublicDescription": "Percentage of time that retirement is stalled by the Memory Cluster due to a pipeline stall. See Info.Mem_Exec_Bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipbranch" + }, + { + "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "MetricName": "tma_info_br_inst_mix_ipcall" + }, + { + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u", + "MetricName": "tma_info_br_inst_mix_ipfarbranch" + }, + { + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", + "MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_ntaken" + }, + { + "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricName": "tma_info_br_inst_mix_ipmisp_cond_taken" + }, + { + "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", + "MetricName": "tma_info_br_inst_mix_ipmisp_indirect" + }, + { + "BriefDescription": "Instructions per retired return Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", + "MetricName": "tma_info_br_inst_mix_ipmisp_ret" + }, + { + "BriefDescription": "Instructions per retired Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_inst_mix_ipmispredict" + }, + { + "BriefDescription": "Ratio of all branches which mispredict", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_ratio" + }, + { + "BriefDescription": "Ratio between Mispredicted branches and unknown branches", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricName": "tma_info_br_mispredict_bound_branch_mispredict_to_unknown_branch_ratio" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full", + "MetricExpr": "tma_info_buffer_stalls_load_buffer_stall_cycles", + "MetricName": "tma_info_buffer_stalls_%_load_buffer_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full", + "MetricExpr": "tma_info_buffer_stalls_mem_rsv_stall_cycles", + "MetricName": "tma_info_buffer_stalls_%_mem_rsv_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full", + "MetricExpr": "tma_info_buffer_stalls_store_buffer_stall_cycles", + "MetricName": "tma_info_buffer_stalls_%_store_buffer_stall_cycles" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to load buffer full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.LD_BUF / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_load_buffer_stall_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to memory reservation stations full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.RSV / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_mem_rsv_stall_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of time that allocation is stalled due to store buffer full", + "MetricExpr": "100 * MEM_SCHEDULER_BLOCK.ST_BUF / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_buffer_stalls_store_buffer_stall_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Cycles Per Instruction", + "MetricExpr": "CPU_CLK_UNHALTED.CORE / INST_RETIRED.ANY", + "MetricName": "tma_info_core_cpi" + }, + { + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricExpr": "FP_FLOPS_RETIRED.ALL / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Flops", + "MetricName": "tma_info_core_flopc" + }, + { + "BriefDescription": "Instructions Per Cycle", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.CORE", + "MetricName": "tma_info_core_ipc" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "TOPDOWN_RETIRING.ALL_P / INST_RETIRED.ANY", + "MetricName": "tma_info_core_upi" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2", + "MetricExpr": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l2hit", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l2hit" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3", + "MetricExpr": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l3hit", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3hit" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss subsequently misses in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.LLC_MISS / MEM_BOUND_STALLS_IFETCH.ALL", + "MetricName": "tma_info_ifetch_miss_bound_%_ifetchmissbound_with_l3miss" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.L2_HIT / MEM_BOUND_STALLS_IFETCH.ALL", + "MetricName": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l2hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of ifetch miss bound stalls, where the ifetch miss hits in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_IFETCH.LLC_HIT / MEM_BOUND_STALLS_IFETCH.ALL", + "MetricName": "tma_info_ifetch_miss_bound_ifetchmissbound_with_l3hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2", + "MetricExpr": "tma_info_load_miss_bound_loadmissbound_with_l2hit", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l2hit" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3", + "MetricExpr": "tma_info_load_miss_bound_loadmissbound_with_l3hit", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3hit" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that subsequently misses the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.LLC_MISS / MEM_BOUND_STALLS_LOAD.ALL", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_%_loadmissbound_with_l3miss" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.L2_HIT / MEM_BOUND_STALLS_LOAD.ALL", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_loadmissbound_with_l2hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of memory bound stalls where retirement is stalled due to an L1 miss that hit the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS_LOAD.LLC_HIT / MEM_BOUND_STALLS_LOAD.ALL", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_miss_bound_loadmissbound_with_l3hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a pipeline block", + "MetricExpr": "100 * LD_HEAD.L1_BOUND_AT_RET / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_l1_bound" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement", + "MetricExpr": "100 * (LD_HEAD.L1_BOUND_AT_RET + MEM_BOUND_STALLS_LOAD.ALL) / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_load_bound" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full", + "MetricExpr": "100 * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL) * tma_mem_scheduler", + "MetricGroup": "load_store_bound", + "MetricName": "tma_info_load_store_bound_store_bound" + }, + { + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to floating point assists", + "MetricExpr": "1e3 * MACHINE_CLEARS.FP_ASSIST / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_fp_assist_pki" + }, + { + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to page faults", + "MetricExpr": "1e3 * MACHINE_CLEARS.PAGE_FAULT / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_page_fault_pki" + }, + { + "BriefDescription": "Counts the number of machine clears relative to thousands of instructions retired, due to self-modifying code", + "MetricExpr": "1e3 * MACHINE_CLEARS.SMC / INST_RETIRED.ANY", + "MetricName": "tma_info_machine_clear_bound_machine_clears_smc_pki" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block", + "MetricExpr": "tma_info_mem_exec_blocks_loads_with_adressaliasing", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_adressaliasing" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "tma_info_mem_exec_blocks_loads_with_storefwdblk", + "MetricName": "tma_info_mem_exec_blocks_%_loads_with_storefwdblk" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with an address aliasing block", + "MetricExpr": "100 * LD_BLOCKS.ADDRESS_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_loads_with_adressaliasing", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_exec_blocks_loads_with_storefwdblk", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_l1miss", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_l1miss" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_otherpipelineblks", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_otherpipelineblks" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_pagewalk", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_pagewalk" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_stlbhit", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_stlbhit" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match", + "MetricExpr": "tma_info_mem_exec_bound_loadhead_with_storefwding", + "MetricName": "tma_info_mem_exec_bound_%_loadhead_with_storefwding" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a first level data cache miss", + "MetricExpr": "100 * LD_HEAD.L1_MISS_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_l1miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to other block cases, such as pipeline conflicts, fences, etc", + "MetricExpr": "100 * LD_HEAD.OTHER_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_otherpipelineblks", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a pagewalk", + "MetricExpr": "100 * LD_HEAD.PGWALK_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_pagewalk", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a second level TLB miss", + "MetricExpr": "100 * LD_HEAD.DTLB_MISS_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_stlbhit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Percentage of Memory Execution Bound due to a store forward address match", + "MetricExpr": "100 * LD_HEAD.ST_ADDR_AT_RET / LD_HEAD.ANY_AT_RET", + "MetricName": "tma_info_mem_exec_bound_loadhead_with_storefwding", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Instructions per Load", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_ipload" + }, + { + "BriefDescription": "Instructions per Store", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "tma_info_mem_mix_ipstore" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that perform one or more locks", + "MetricExpr": "100 * MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_locks_ratio" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "tma_info_mem_mix_load_splits_ratio" + }, + { + "BriefDescription": "Ratio of mem load uops to all uops", + "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_mem_mix_memload_ratio" + }, + { + "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction", + "MetricExpr": "tma_info_serialization_tpause_cycles", + "MetricName": "tma_info_serialization _%_tpause_cycles" + }, + { + "BriefDescription": "Percentage of time that the core is stalled due to a TPAUSE or UMWAIT instruction", + "MetricExpr": "100 * SERIALIZATION.C01_MS_SCB / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricName": "tma_info_serialization_tpause_cycles", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricName": "tma_info_system_cpu_utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "FP_FLOPS_RETIRED.ALL / (duration_time * 1e9)", + "MetricGroup": "Flops", + "MetricName": "tma_info_system_gflops", + "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE", + "MetricGroup": "Summary", + "MetricName": "tma_info_system_kernel_utilization" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.CORE / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "tma_info_system_turbo_utilization" + }, + { + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.FPDIV / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_fpdiv_uop_ratio" + }, + { + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.IDIV / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_idiv_uop_ratio" + }, + { + "BriefDescription": "Percentage of all uops which are microcode ops", + "MetricExpr": "100 * UOPS_RETIRED.MS / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_microcode_uop_ratio" + }, + { + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * UOPS_RETIRED.X87 / TOPDOWN_RETIRING.ALL_P", + "MetricName": "tma_info_uop_mix_x87_uop_ratio" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ITLB_MISS / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_latency_group", + "MetricName": "tma_itlb_misses", + "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_ifetch_latency > 0.15 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", + "MetricName": "tma_machine_clears", + "MetricThreshold": "tma_machine_clears > 0.05 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops", + "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_mem_scheduler", + "MetricThreshold": "tma_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops", + "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_non_mem_scheduler", + "MetricThreshold": "tma_non_mem_scheduler > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear that requires the use of microcode (slow nuke)", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", + "MetricName": "tma_nuke", + "MetricThreshold": "tma_nuke > 0.05 & (tma_machine_clears > 0.05 & tma_bad_speculation > 0.15)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_other_fb", + "MetricThreshold": "tma_other_fb > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_ifetch_bandwidth_group", + "MetricName": "tma_predecode", + "MetricThreshold": "tma_predecode > 0.05 & (tma_ifetch_bandwidth > 0.1 & tma_frontend_bound > 0.2)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls)", + "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_register", + "MetricThreshold": "tma_register > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls)", + "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_reorder_buffer", + "MetricThreshold": "tma_reorder_buffer > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a resource limitation", + "MetricExpr": "tma_backend_bound - tma_core_bound", + "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", + "MetricName": "tma_resource_bound", + "MetricThreshold": "tma_resource_bound > 0.2 & tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that result in retirement slots", + "MetricExpr": "TOPDOWN_RETIRING.ALL_P / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL1;tma_L1_group", + "MetricName": "tma_retiring", + "MetricThreshold": "tma_retiring > 0.75", + "MetricgroupNoGroup": "TopdownL1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS)", + "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / (6 * CPU_CLK_UNHALTED.CORE)", + "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", + "MetricName": "tma_serialization", + "MetricThreshold": "tma_serialization > 0.1 & (tma_resource_bound > 0.2 & tma_backend_bound > 0.1)", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Uncore operating frequency in GHz", + "MetricExpr": "UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages) / 1e9 / duration_time", + "MetricName": "uncore_frequency", + "ScaleUnit": "1GHz" + }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_transmit_bw", + "ScaleUnit": "1MB/s" + } +] diff --git a/scripts/perf/skylake/counter.json b/scripts/perf/skylake/counter.json index f0d685b1..1be6522e 100644 --- a/scripts/perf/skylake/counter.json +++ b/scripts/perf/skylake/counter.json @@ -15,7 +15,7 @@ "CountersNumGeneric": "2" }, { - "Unit": "CLOCK", + "Unit": "cbox_0", "CountersNumFixed": 1, "CountersNumGeneric": "0" } diff --git a/scripts/perf/skylake/uncore-cache.json b/scripts/perf/skylake/uncore-cache.json index 3c55288f..46ba98ab 100644 --- a/scripts/perf/skylake/uncore-cache.json +++ b/scripts/perf/skylake/uncore-cache.json @@ -134,5 +134,14 @@ "PerPkg": "1", "UMask": "0x41", "Unit": "CBOX" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", + "Counter": "FIXED", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Unit": "cbox_0" } ]