Skip to content

Commit

Permalink
add python stats package
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Sep 1, 2023
1 parent 3d51cdd commit 9fbed9d
Show file tree
Hide file tree
Showing 10 changed files with 486 additions and 263 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ rustup component add llvm-tools-preview
cargo install grcov

# collect code coverage in tests (todo)
cargo xtasks coverage
cargo xtask coverage

cargo xtask accelsim convert-config -c ./accelsim/gtx1080/gpgpusim.config -c ./accelsim/gtx1080/gpgpusim.trace.config
```

Publishing traces (used by CI)
Expand Down
248 changes: 248 additions & 0 deletions accelsim/gtx1080/gpgpusim.config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
shader_core:
gpgpu_simd_model: 1
gpgpu_shader_core_pipeline: 2048:32
gpgpu_tex_cache_l1: N:16:128:24,L:R:m:N:L,F:128:4,128:2
gpgpu_const_cache_l1: N:128:64:2,L:R:f:N:L,A:2:64,4
gpgpu_cache_il1: N:8:128:4,L:R:f:N:L,A:2:48,4
gpgpu_cache_dl1: N:64:128:6,L:L:m:N:H,A:128:8,8
gpgpu_l1_cache_write_ratio: 0
gpgpu_l1_banks: 1
gpgpu_l1_banks_byte_interleaving: 32
gpgpu_l1_banks_hashing_function: 0
gpgpu_l1_latency: 1
gpgpu_smem_latency: 3
gpgpu_cache_dl1_pref_l1: none
gpgpu_cache_dl1_pref_shared: none
gpgpu_gmem_skip_l1d: true
gpgpu_perfect_mem: false
n_regfile_gating_group: 4
gpgpu_clock_gated_reg_file: false
gpgpu_clock_gated_lanes: false
gpgpu_shader_registers: 65536
gpgpu_registers_per_block: 8192
gpgpu_ignore_resources_limitation: false
gpgpu_shader_cta: 32
gpgpu_num_cta_barriers: 16
gpgpu_n_clusters: 20
gpgpu_n_cores_per_cluster: 1
gpgpu_n_cluster_ejection_buffer_size: 8
gpgpu_n_ldst_response_buffer_size: 2
gpgpu_shmem_per_block: 49152
gpgpu_shmem_size: 98304
gpgpu_shmem_option: 0
gpgpu_unified_l1d_size: 0
gpgpu_adaptive_cache_config: false
gpgpu_shmem_size_default: 16384
gpgpu_shmem_size_pref_l1: 16384
gpgpu_shmem_size_pref_shared: 16384
gpgpu_shmem_num_banks: 32
gpgpu_shmem_limited_broadcast: 0
gpgpu_shmem_warp_parts: 1
gpgpu_mem_unit_ports: 1
gpgpu_warpdistro_shader: -1
gpgpu_warp_issue_shader: 0
gpgpu_local_mem_map: true
gpgpu_num_reg_banks: 32
gpgpu_reg_bank_use_warp_id: false
gpgpu_sub_core_model: false
gpgpu_enable_specialized_operand_collector: true
gpgpu_operand_collector_num_units_sp: 20
gpgpu_operand_collector_num_units_dp: 0
gpgpu_operand_collector_num_units_sfu: 4
gpgpu_operand_collector_num_units_int: 0
gpgpu_operand_collector_num_units_tensor_core: 4
gpgpu_operand_collector_num_units_mem: 8
gpgpu_operand_collector_num_units_gen: 0
gpgpu_operand_collector_num_in_ports_sp: 4
gpgpu_operand_collector_num_in_ports_dp: 0
gpgpu_operand_collector_num_in_ports_sfu: 1
gpgpu_operand_collector_num_in_ports_int: 0
gpgpu_operand_collector_num_in_ports_tensor_core: 1
gpgpu_operand_collector_num_in_ports_mem: 1
gpgpu_operand_collector_num_in_ports_gen: 0
gpgpu_operand_collector_num_out_ports_sp: 4
gpgpu_operand_collector_num_out_ports_dp: 0
gpgpu_operand_collector_num_out_ports_sfu: 1
gpgpu_operand_collector_num_out_ports_int: 0
gpgpu_operand_collector_num_out_ports_tensor_core: 1
gpgpu_operand_collector_num_out_ports_mem: 1
gpgpu_operand_collector_num_out_ports_gen: 0
gpgpu_coalesce_arch: 13
gpgpu_num_sched_per_core: 2
gpgpu_max_insn_issue_per_warp: 2
gpgpu_dual_issue_diff_exec_units: true
gpgpu_simt_core_sim_order: 1
gpgpu_pipeline_widths: 4,0,0,1,1,4,0,0,1,1,6
gpgpu_tensor_core_avail: 0
gpgpu_num_sp_units: 4
gpgpu_num_dp_units: 0
gpgpu_num_int_units: 0
gpgpu_num_sfu_units: 1
gpgpu_num_tensor_core_units: 0
gpgpu_num_mem_units: 1
gpgpu_scheduler: gto
gpgpu_concurrent_kernel_sm: false
gpgpu_perfect_inst_const_cache: false
gpgpu_inst_fetch_throughput: 1
gpgpu_reg_file_port_throughput: 1
specialized_unit_1: 0,4,4,4,4,BRA
specialized_unit_2: 0,4,4,4,4,BRA
specialized_unit_3: 0,4,4,4,4,BRA
specialized_unit_4: 0,4,4,4,4,BRA
specialized_unit_5: 0,4,4,4,4,BRA
specialized_unit_6: 0,4,4,4,4,BRA
specialized_unit_7: 0,4,4,4,4,BRA
specialized_unit_8: 0,4,4,4,4,BRA
ptx:
g_save_embedded_ptx: false
keep: false
g_ptx_save_converted_ptxplus: false
g_occupancy_sm_number: 60
opcode_latency_int: 4,13,4,5,145
opcode_latency_fp: 4,13,4,5,39
opcode_latency_dp: 8,19,8,8,330
opcode_latency_sfu: '8'
opcode_latency_tensor: '64'
opcode_initiation_int: 1,2,2,2,8
opcode_initiation_fp: 1,2,1,1,4
opcode_initiation_dp: 1,2,1,1,130
opcode_initiation_sfu: '8'
opcode_initiation_tensor: '64'
cdp_latency_str: 7200,8000,100,12000,1600
trace:
traces_filename: ./traces/kernelslist.g
trace_opcode_latency_initiation_int: 4,1
trace_opcode_latency_initiation_sp: 4,1
trace_opcode_latency_initiation_dp: 4,1
trace_opcode_latency_initiation_sfu: 4,1
trace_opcode_latency_initiation_tensor: 4,1
trace_opcode_latency_initiation_spec_op_1: 4,4
trace_opcode_latency_initiation_spec_op_2: 4,4
trace_opcode_latency_initiation_spec_op_3: 4,4
trace_opcode_latency_initiation_spec_op_4: 4,4
trace_opcode_latency_initiation_spec_op_5: 4,4
trace_opcode_latency_initiation_spec_op_6: 4,4
trace_opcode_latency_initiation_spec_op_7: 4,4
trace_opcode_latency_initiation_spec_op_8: 4,4
sim:
gpu_max_cycle_opt: 0
gpu_max_insn_opt: 0
gpu_max_cta_opt: 0
gpu_max_completed_cta_opt: 0
gpgpu_runtime_stat: '500'
liveness_message_freq: 1
gpgpu_compute_capability_major: 7
gpgpu_compute_capability_minor: 0
gpgpu_flush_l1_cache: false
gpgpu_flush_l2_cache: false
gpu_deadlock_detect: true
gpgpu_ptx_instruction_classification: 0
g_ptx_sim_mode: 0
gpgpu_clock_domains: 1607.0:1607.0:1607.0:2500.0
max_concurrent_kernel: 32
gpgpu_cflog_interval: 0
g_visualizer_enabled: true
g_visualizer_filename: null
g_visualizer_zlevel: 6
stack_size_limit: 1024
heap_size_limit: 8388608
runtime_sync_depth_limit: 2
runtime_pending_launch_count_limit: 2048
trace_enabled: false
trace_config_str: none
trace_sampling_core: 0
trace_sampling_memory_partition: -1
g_kernel_launch_latency: 0
g_cdp_enabled: false
g_tb_launch_latency: 0
dram_timing:
nbk: null
t_ccd: null
t_rrd: null
t_rdc: null
t_ras: null
t_rp: null
t_rc: null
t_cdlr: null
t_wr: null
cl: null
wl: null
nbkgrp: 1
t_ccdl: 0
t_rtpl: 0
functional:
m_ptx_use_cuobjdump: true
m_experimental_lib_support: false
checkpoint_option: 0
checkpoint_kernel: 1
checkpoint_cta: 0
resume_option: 0
resume_kernel: 0
resume_cta: 0
checkpoint_cta_t: 0
checkpoint_insn_y: 0
m_ptx_convert_to_ptxplus: false
m_ptx_force_max_capability: 60
g_ptx_inst_debug_to_file: false
g_ptx_inst_debug_file: inst_debug.txt
g_ptx_inst_debug_thread_uid: 1
interconn:
g_network_mode: 1
g_network_config_filename: config_fermi_islip.icnt
in_buffer_limit: 64
out_buffer_limit: 64
subnets: 2
arbiter_algo: 1
verbose: 0
grant_cycles: 1
memory:
perf_sim_memcpy: true
simple_dram_model: false
scheduler_type: 1
gpgpu_l2_queue_config: 8:8:8:8
l2_ideal: false
l2_config_string: N:64:128:16,L:B:m:W:L,A:1024:1024,4:0,32
l2_texure_only: false
n_mem: 8
n_sub_partition_per_memory_channel: 2
gpu_n_mem_per_ctrlr: 1
gpgpu_memlatency_stat: 14
gpgpu_frfcfs_dram_sched_queue_size: 64
gpgpu_dram_return_queue_size: 116
dram_bus_width: 4
dram_burst_length: 8
data_command_freq_ratio: 4
gpgpu_dram_timing_opt: |-
"nbk=16:CCD=2:RRD=6:RCD=12:RAS=28:RP=12:RC=40:
CL=12:WL=4:CDLR=5:WR=12:nbkgrp=1:CCDL=0:RTPL=0"
rop_latency: 120
dram_latency: 100
dual_bus_interface: 0
dram_bnk_indexing_policy: 0
dram_bnkgrp_indexing_policy: 0
seperate_write_queue_enabled: false
write_queue_size_opt: 32:28:16
elimnate_rw_turnaround: false
icnt_flit_size: 32
address_mapping:
addrdec_option: dramid@8;00000000.00000000.00000000.00000000.0000RRRR.RRRRRRRR.RBBBCCCC.BCCSSSSS
run_test: false
gpgpu_mem_address_mask: 1
memory_partition_indexing: 0
unknown:
- --enable_ptx_file_line_stats=1
- --visualizer_enabled=0
- --power_simulation_enabled=0
- --trace_opcode_latency_initiation_int=2,2
- --trace_opcode_latency_initiation_sp=2,1
- --trace_opcode_latency_initiation_dp=64,64
- --trace_opcode_latency_initiation_sfu=21,8
- --trace_opcode_latency_initiation_tensor=32,32
- --specialized_unit_1=1,4,4,4,4,BRA
- --trace_opcode_latency_initiation_spec_op_1=4,4
- --specialized_unit_2=1,4,200,4,4,TEX
- --trace_opcode_latency_initiation_spec_op_2=200,4
- --specialized_unit_3=1,4,32,4,4,TENSOR
- --trace_opcode_latency_initiation_spec_op_3=32,32
- --specialized_unit_4=1,4,4,4,4,UDP
- --trace_opcode_latency_initiation_spec_op_4=4,1
31 changes: 30 additions & 1 deletion gpucachesim/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,43 @@
import yaml
from pathlib import Path
from os import PathLike
from typing import Optional
import typing

from gpucachesim import ROOT_DIR

REPO_ROOT_DIR = ROOT_DIR.parent
DEFAULT_BENCH_FILE = REPO_ROOT_DIR / "test-apps/test-apps-materialized.yml"


class SimConfig(typing.TypedDict):
gpgpu_clock_domains: str

# @property
# def core_clock_speed(self) -> int:
# self.gpgpu_clock_domains()
#
# @property
# def num_cores(self) -> int:
# kk


class GPUConfig(typing.TypedDict):
sim: SimConfig


class ProfileConfig(typing.TypedDict):
profile_dir: PathLike


class SimulateConfig(typing.TypedDict):
profile_dir: PathLike


class BenchConfig(typing.TypedDict):
profile: ProfileConfig
simulate: SimulateConfig


class Benchmarks:
def __init__(self, path: PathLike) -> None:
"""load the materialized benchmark config"""
Expand Down
20 changes: 16 additions & 4 deletions gpucachesim/stats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import click
import yaml

import gpucachesim.stats.stats as stats
import gpucachesim.stats.native as native
from gpucachesim.benchmarks import Benchmarks
from gpucachesim.benchmarks import Benchmarks, GPUConfig, REPO_ROOT_DIR


DEFAULT_CONFIG_FILE = REPO_ROOT_DIR / "./accelsim/gtx1080/gpgpusim.config.yml"


@click.command()
@click.option("--path", help="Path to materialized benchmark config")
@click.option("--config", default=DEFAULT_CONFIG_FILE, help="Path to GPU config")
@click.option("--bench", help="Benchmark name")
@click.option("--input", default=0, help="Input index")
def main(path, bench, input):
def main(path, config, bench, input):
from pprint import pprint

b = Benchmarks(path)
Expand All @@ -19,8 +24,15 @@ def main(path, bench, input):
bench_config = b.get_bench_config(bench, input)
# pprint(bench_config)

our_stats = stats.Stats(bench_config["simulate"])
native_stats = native.Stats(bench_config["simulate"])
with open(config, "rb") as f:
config: GPUConfig = yaml.safe_load(f)

pprint(config)
our_stats = stats.Stats(bench_config)
native_stats = native.Stats(config, bench_config)

print(native_stats.cycles())
print(our_stats.cycles())


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 9fbed9d

Please sign in to comment.