Skip to content

Commit

Permalink
python stats
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Sep 2, 2023
1 parent 293be12 commit 5c6a25a
Show file tree
Hide file tree
Showing 15 changed files with 443 additions and 238 deletions.
6 changes: 3 additions & 3 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# [target.x86_64-unknown-linux-gnu]
# linker = "/usr/bin/clang"
# rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]
[target.x86_64-unknown-linux-gnu]
linker = "/usr/bin/clang"
rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]

[alias]
xtask = "run --package xtask --"
Expand Down
43 changes: 32 additions & 11 deletions gpucachesim/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,48 @@
from pathlib import Path
from os import PathLike
import typing
from typing import Dict

from gpucachesim import ROOT_DIR

REPO_ROOT_DIR = ROOT_DIR.parent
DEFAULT_BENCH_FILE = REPO_ROOT_DIR / "test-apps/test-apps-materialized.yml"


class SimConfig(typing.TypedDict):
gpgpu_clock_domains: str
# class SimConfig(typing.TypedDict):
# gpgpu_clock_domains: str

# @property
# def core_clock_speed(self) -> int:
# self.gpgpu_clock_domains()
#
# @property
# def num_cores(self) -> int:
# kk

class GPUConfig:
def __init__(self, config) -> None:
self.config = config

class GPUConfig(typing.TypedDict):
sim: SimConfig
@property
def _clock_domains(self) -> Dict[str, float]:
"""<Core Clock>:<Interconnect Clock>:<L2 Clock>:<DRAM Clock>"""
clock_domains = list(self.config["sim"]["gpgpu_clock_domains"].split(":"))
return dict(
core=clock_domains[0],
interconnect=clock_domains[1],
l2=clock_domains[2],
dram=clock_domains[3],
)

@property
def core_clock_speed(self) -> float:
return self._clock_domains["core"]

@property
def num_clusters(self) -> int:
return self.config["shader_core"]["gpgpu_n_clusters"]

@property
def cores_per_cluster(self) -> int:
return self.config["shader_core"]["gpgpu_n_clusters"]

@property
def num_total_cores(self) -> int:
return self.num_clusters * self.cores_per_cluster


class ProfileConfig(typing.TypedDict):
Expand Down
65 changes: 53 additions & 12 deletions gpucachesim/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import gpucachesim.stats.stats as stats
import gpucachesim.stats.native as native
import gpucachesim.stats.accelsim as accelsim
from gpucachesim.benchmarks import Benchmarks, GPUConfig, REPO_ROOT_DIR


Expand All @@ -12,27 +13,67 @@
@click.command()
@click.option("--path", help="Path to materialized benchmark config")
@click.option("--config", default=DEFAULT_CONFIG_FILE, help="Path to GPU config")
@click.option("--bench", help="Benchmark name")
@click.option("--input", default=0, help="Input index")
def main(path, config, bench, input):
@click.option("--bench", "bench_name", help="Benchmark name")
@click.option("--input", "input_idx", help="Input index")
def main(path, config, bench_name, input_idx):
from pprint import pprint
import wasabi

benches = []

b = Benchmarks(path)
if bench is None:
if bench_name is None:
raise NotImplemented
print(bench, input)
bench_config = b.get_bench_config(bench, input)
# pprint(bench_config)

if input_idx is None:
benches.extend(b[bench_name])
else:
benches.append(b.get_bench_config(bench_name, input_idx))

with open(config, "rb") as f:
config: GPUConfig = yaml.safe_load(f)
config = GPUConfig(yaml.safe_load(f))

pprint(config)
our_stats = stats.Stats(bench_config)
native_stats = native.Stats(config, bench_config)

print(native_stats.cycles())
print(our_stats.cycles())
for bench_config in benches:
name = bench_config["name"]
input_idx = bench_config["input_idx"]
print(f"\n\n=== {name}@{input_idx} ===")

# our_stats = stats.Stats(bench_config)
accelsim_stats = accelsim.Stats(config, bench_config)
native_stats = native.Stats(config, bench_config)

# data = [
# ("native", native_stats.instructions(), accelsim_stats.instructions()),
# ("cycles", native_stats.cycles(), accelsim_stats.cycles()),
# ]
# print(
# wasabi.table(
# data,
# header=("", "instructions", "cycles"),
# divider=True,
# aligns=("r", "r", "r"),
# )
# )

data = [
(
"instructions",
native_stats.instructions(),
accelsim_stats.instructions(),
),
("cycles", native_stats.cycles(), accelsim_stats.cycles()),
]
print(
wasabi.table(
data,
header=("", "native", "accelsim"),
divider=True,
aligns=("r", "r", "r"),
)
)
# , widths=widths, ))


if __name__ == "__main__":
Expand Down
54 changes: 52 additions & 2 deletions gpucachesim/stats/accelsim.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,52 @@
class AccelsimStats:
pass
import pandas as pd
from pathlib import Path

from gpucachesim.benchmarks import GPUConfig, BenchConfig
import gpucachesim.stats.common as common


class Stats(common.Stats):
bench_config: BenchConfig
config: GPUConfig

def __init__(self, config: GPUConfig, bench_config: BenchConfig) -> None:
self.path = Path(bench_config["accelsim_simulate"]["stats_dir"])
self.sim_df = pd.read_csv(
self.path / "stats.sim.csv",
header=0,
)
self.accesses_df = pd.read_csv(self.path / "stats.accesses.csv", header=None, names=["access", "count"])
self.dram_df = pd.read_csv(
self.path / "stats.dram.csv",
header=0,
)
self.dram_banks_df = pd.read_csv(
self.path / "stats.dram.banks.csv",
header=0,
)
self.instructions_df = pd.read_csv(
self.path / "stats.instructions.csv",
header=None,
names=["memory_space", "write", "count"],
)
self.l1i_stats = pd.read_csv(
self.path / "stats.cache.l1i.csv",
header=None,
names=["cache_id", "access_type", "status", "count"],
)
self.l2d_stats = pd.read_csv(
self.path / "stats.cache.l2d.csv",
header=None,
names=["cache_id", "access_type", "status", "count"],
)

self.use_duration = False
self.bench_config = bench_config
self.config = config

def cycles(self) -> int:
# return self.metrics["gpu_tot_sim_cycle"].sum()
return self.sim_df["cycles"].sum()

def instructions(self) -> int:
return self.sim_df["instructions"].sum()
4 changes: 4 additions & 0 deletions gpucachesim/stats/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ class Stats(abc.ABC):
@abstractmethod
def cycles(self) -> int:
pass

@abstractmethod
def instructions(self) -> int:
pass
47 changes: 33 additions & 14 deletions gpucachesim/stats/native.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,26 @@


class Stats(common.Stats):
bench_config: BenchConfig
config: GPUConfig

def __init__(self, config: GPUConfig, bench_config: BenchConfig) -> None:
self.path = Path(bench_config["profile"]["profile_dir"])
with open(self.path / "profile.commands.json", "rb") as f:
self.commands = json.load(f)
self.metrics = pd.read_json(self.path / "profile.metrics.json")
self.df = pd.read_json(self.path / "profile.metrics.json")
self.use_duration = False
self.bench_config = bench_config
self.config = config

def duration_us(self):
if "Duration" in self.metrics:
if "Duration" in self.df:
# convert us to us (1e-6)
# duration already us
return self.metrics["Duration"].sum()
elif "gpu__time_duration.sum_nsecond" in self.metrics:
return self.df["Duration"].sum()
elif "gpu__time_duration.sum_nsecond" in self.df:
# convert ns to us
return self.metrics["gpu__time_duration.sum_nsecond"].sum() * 1e-3
return self.df["gpu__time_duration.sum_nsecond"].sum() * 1e-3
else:
raise ValueError("missing duration")

Expand All @@ -35,24 +39,39 @@ def cycles(self) -> int:
# duration is us, so *1e-6
# unit conversions cancel each other out
duration = self.hw_duration_us()
return duration * self.config["clock_speed"]
return duration * self.config.core_clock_speed
else:
# sm_efficiency: The percentage of time at least one warp
# is active on a specific multiprocessor
# mean_sm_efficiency = self.metrics["sm_efficiency"].mean() / 100.0
# mean_sm_efficiency = self.df["sm_efficiency"].mean() / 100.0
# num_active_sm = self.data.config.spec["sm_count"] * mean_sm_efficiency
# print("num active sms", num_active_sm)

# nsight_col = "sm__cycles_elapsed.sum_cycle"
nsight_col = "gpc__cycles_elapsed.avg_cycle"
# nsight_col = "sm__cycles_active.avg_cycle"
pprint(list(self.metrics.columns.tolist()))
if "elapsed_cycles_sm" in self.metrics:
sm_count = self.config["sm_count"]
cycles = self.metrics["elapsed_cycles_sm"].sum()
return cycles / sm_count
elif nsight_col in self.metrics:
return self.metrics[nsight_col].sum()
# pprint(list(self.df.columns.tolist()))
if "elapsed_cycles_sm" in self.df:
sm_count = self.config.num_total_cores
# sm_count = self.config.num_clusters
# print(self.df["elapsed_cycles_sm"]["value"])
cycles = self.df["elapsed_cycles_sm"].sum()
# this only holds until we have repetitions
assert (cycles == self.df["elapsed_cycles_sm"]["value"]).all()
return int(cycles / sm_count)
elif nsight_col in self.df:
return self.df[nsight_col].sum()
else:
raise ValueError("hw dataframe missing cycles")
# hw_value *= mean_sm_efficiency

def instructions(self):
if "inst_issued" in self.df:
# there is also inst_executed
return self.df["inst_issued"].sum() * 20 # * self.config.num_total_cores
elif "smsp__inst_executed.sum_inst" in self.df:
# there is also sm__inst_executed.sum_inst
# sm__sass_thread_inst_executed.sum_inst
return self.df["smsp__inst_executed.sum_inst"].sum()
else:
raise ValueError("missing instructions")
Loading

0 comments on commit 5c6a25a

Please sign in to comment.