From ae764c301921be3096df7d582252d3f5e1f33238 Mon Sep 17 00:00:00 2001 From: Marek Materzok Date: Fri, 26 Jan 2024 10:16:28 +0100 Subject: [PATCH] Transaction profiles (#547) --- .github/workflows/main.yml | 4 +- .gitignore | 1 + docs/development-environment.md | 33 +++++ requirements-dev.txt | 2 + scripts/run_tests.py | 4 + scripts/tprof.py | 84 +++++++++++++ test/common/__init__.py | 1 + test/common/infrastructure.py | 32 ++++- test/common/profiler.py | 85 +++++++++++++ test/transactions/test_methods.py | 5 +- transactron/core.py | 5 + transactron/profiler.py | 194 ++++++++++++++++++++++++++++++ 12 files changed, 442 insertions(+), 8 deletions(-) create mode 100755 scripts/tprof.py create mode 100644 test/common/profiler.py create mode 100644 transactron/profiler.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4227aae64..f450a431b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -246,8 +246,8 @@ jobs: - name: Run tests run: ./scripts/run_tests.py --verbose - - name: Check traces - run: ./scripts/run_tests.py -t -c 1 TestCore + - name: Check traces and profiles + run: ./scripts/run_tests.py -t -p -c 1 TestCore lint: name: Check code formatting and typing diff --git a/.gitignore b/.gitignore index b545b086c..a27638d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ venv.bak/ # Tests outputs test/__traces__ +test/__profiles__/*.json # cocotb build /test/regression/cocotb/build diff --git a/docs/development-environment.md b/docs/development-environment.md index cfdb58cad..610fe0610 100644 --- a/docs/development-environment.md +++ b/docs/development-environment.md @@ -40,6 +40,7 @@ The `run_tests.py` script has the following options: * `-l`, `--list` -- lists available tests. This option is helpful, e.g., to find a name of a test generated using the `parameterized` package. * `-t`, `--trace` -- generates waveforms in the `vcd` format and `gtkw` files for the `gtkwave` tool. The files are saved in the `test/__traces__/` directory. Useful for debugging and test-driven development. +* `-p`, `--profile` -- generates Transactron execution profile information, which can then be read by the script `tprof.py`. The files are saved in the `test/__profile__/` directory. Useful for analyzing performance. * `-v`, `--verbose` -- makes the test runner more verbose. It will, for example, print the names of all the tests being run. ### lint.sh @@ -76,3 +77,35 @@ The `core_graph.py` script has the following options: ### build\_docs.sh Generates local documentation using [Sphinx](https://www.sphinx-doc.org/). The generated HTML files are located in `build/html`. + +### tprof.py + +Processes Transactron profile files and presents them in a readable way. +To generate a profile file, the `run_tests.py` script should be used with the `--profile` option. +The `tprof.py` can then be run as follows: + +``` +scripts/tprof.py test/__profile__/profile_file.json +``` + +This displays the profile information about transactions by default. +For method profiles, one should use the `--mode=methods` option. + +The columns have the following meaning: + +* `name` -- the name of the transaction or method in question. The method names are displayed together with the containing module name to differentiate between identically named methods in different modules. +* `source location` -- the file and line where the transaction or method was declared. Used to further disambiguate transaction/methods. +* `locked` -- for methods, shows the number of cycles the method was locked by the caller (called with a false condition). For transactions, shows the number of cycles the transaction could run, but was forced to wait by another, conflicting, transaction. +* `run` -- shows the number of cycles the given method/transaction was running. + +To display information about method calls, one can use the `--call-graph` option. +When displaying transaction profiles, this option produces a call graph. For each transaction, there is a tree of methods which are called by this transaction. +Counters presented in the tree shows information about the calls from the transaction in the root of the tree: if a method is also called by a different transaction, these calls are not counted. +When displaying method profiles, an inverted call graph is produced: the transactions are in the leaves, and the children nodes are the callers of the method in question. +In this mode, the `locked` field in the tree shows how many cycles a given method or transaction was responsible for locking the method in the root. + +Other options of `tprof.py` are: + +* `--sort` -- selects which column is used for sorting rows. +* `--filter-name` -- filters rows by name. Regular expressions can be used. +* `--filter-loc` -- filters rows by source locations. Regular expressions can be used. diff --git a/requirements-dev.txt b/requirements-dev.txt index 35c93666f..3e90e45be 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -17,3 +17,5 @@ cocotb==1.7.2 cocotb-bus==0.2.1 pytest==7.2.2 pyelftools==0.29 +dataclasses-json==0.6.3 +tabulate==0.9.0 diff --git a/scripts/run_tests.py b/scripts/run_tests.py index 0f310703a..264daa707 100755 --- a/scripts/run_tests.py +++ b/scripts/run_tests.py @@ -104,6 +104,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-l", "--list", action="store_true", help="List all tests") parser.add_argument("-t", "--trace", action="store_true", help="Dump waveforms") + parser.add_argument("-p", "--profile", action="store_true", help="Write execution profiles") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") parser.add_argument("-a", "--all", action="store_true", default=False, help="Run all tests") parser.add_argument( @@ -127,6 +128,9 @@ def main(): if args.trace: os.environ["__COREBLOCKS_DUMP_TRACES"] = "1" + if args.profile: + os.environ["__TRANSACTRON_PROFILE"] = "1" + if args.test_name: pattern = re.compile(args.test_name) unit_tests = {name: test for name, test in unit_tests.items() if pattern.search(name)} diff --git a/scripts/tprof.py b/scripts/tprof.py new file mode 100755 index 000000000..e5209299d --- /dev/null +++ b/scripts/tprof.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import argparse +import sys +import re +from pathlib import Path +from typing import Optional +from collections.abc import Callable, Iterable +from tabulate import tabulate +from dataclasses import asdict + +topdir = Path(__file__).parent.parent +sys.path.insert(0, str(topdir)) + + +from transactron.profiler import Profile, RunStat, RunStatNode # noqa: E402 + + +def process_stat_tree( + xs: Iterable[RunStatNode], recursive: bool, ret: Optional[list[tuple]] = None, depth=0 +) -> list[tuple]: + if ret is None: + ret = list[tuple]() + for x in xs: + row = asdict(x.stat) + if recursive and depth: + row["name"] = (2 * depth - 1) * "-" + " " + row["name"] + ret.append(tuple(row.values())) + if recursive and x.callers: + process_stat_tree(x.callers.values(), recursive, ret, depth + 1) + return ret + + +def filter_nodes(nodes: list[RunStatNode], key: Callable[[RunStat], str], regex: str): + pattern = re.compile(regex) + return [node for node in nodes if pattern.search(key(node.stat))] + + +def sort_node(node: RunStatNode, sort_order: str): + node.callers = dict(sorted(node.callers.items(), key=lambda node: asdict(node[1].stat)[sort_order])) + for node2 in node.callers.values(): + sort_node(node2, sort_order) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-g", "--call-graph", action="store_true", help="Show call graph") + parser.add_argument("-s", "--sort", choices=["name", "locked", "run"], default="name", help="Sort by column") + parser.add_argument( + "-m", "--mode", choices=["transactions", "methods"], default="transactions", help="Profile display mode" + ) + parser.add_argument("-f", "--filter-name", help="Filter by name, regular expressions can be used") + parser.add_argument("-l", "--filter-loc", help="Filter by source location, regular expressions can be used") + parser.add_argument("file_name", nargs=1) + + args = parser.parse_args() + + profile = Profile.decode(args.file_name[0]) + + recursive = args.call_graph + + if args.mode == "transactions": + nodes = profile.analyze_transactions(recursive=recursive) + elif args.mode == "methods": + nodes = profile.analyze_methods(recursive=recursive) + else: + assert False + + headers = ["name", "source location", "locked", "run"] + + nodes.sort(key=lambda node: asdict(node.stat)[args.sort]) + for node in nodes: + sort_node(node, args.sort) + + if args.filter_name: + nodes = filter_nodes(nodes, lambda stat: stat.name, args.filter_name) + if args.filter_loc: + nodes = filter_nodes(nodes, lambda stat: stat.src_loc, args.filter_loc) + + print(tabulate(process_stat_tree(nodes, recursive), headers=headers)) + + +if __name__ == "__main__": + main() diff --git a/test/common/__init__.py b/test/common/__init__.py index 4b44c291f..46eb4b5a1 100644 --- a/test/common/__init__.py +++ b/test/common/__init__.py @@ -2,4 +2,5 @@ from .infrastructure import * # noqa: F401 from .sugar import * # noqa: F401 from .testbenchio import * # noqa: F401 +from .profiler import * # noqa: F401 from transactron.utils import data_layout # noqa: F401 diff --git a/test/common/infrastructure.py b/test/common/infrastructure.py index 3382f9a2c..51dd3e8ce 100644 --- a/test/common/infrastructure.py +++ b/test/common/infrastructure.py @@ -9,6 +9,7 @@ from amaranth import * from amaranth.sim import * from .testbenchio import TestbenchIO +from .profiler import profiler_process, Profile from .functions import TestGen from ..gtkw_extension import write_vcd_ext from transactron import Method @@ -140,12 +141,18 @@ def _wrapping_function(self): class PysimSimulator(Simulator): - def __init__(self, module: HasElaborate, max_cycles: float = 10e4, add_transaction_module=True, traces_file=None): + def __init__( + self, + module: HasElaborate, + max_cycles: float = 10e4, + add_transaction_module=True, + traces_file=None, + clk_period=1e-6, + ): test_module = _TestModule(module, add_transaction_module) - tested_module = test_module.tested_module + self.tested_module = tested_module = test_module.tested_module super().__init__(test_module) - clk_period = 1e-6 self.add_clock(clk_period) if isinstance(tested_module, HasDebugSignals): @@ -206,13 +213,30 @@ def run_simulation(self, module: HasElaborate, max_cycles: float = 10e4, add_tra if "__COREBLOCKS_DUMP_TRACES" in os.environ: traces_file = unittest.TestCase.id(self) + clk_period = 1e-6 sim = PysimSimulator( - module, max_cycles=max_cycles, add_transaction_module=add_transaction_module, traces_file=traces_file + module, + max_cycles=max_cycles, + add_transaction_module=add_transaction_module, + traces_file=traces_file, + clk_period=clk_period, ) self.add_all_mocks(sim, sys._getframe(2).f_locals) yield sim + + profile = None + if "__TRANSACTRON_PROFILE" in os.environ and isinstance(sim.tested_module, TransactionModule): + profile = Profile() + sim.add_sync_process(profiler_process(sim.tested_module.transactionManager, profile, clk_period)) + res = sim.run() + if profile is not None: + profile_dir = "test/__profiles__" + profile_file = unittest.TestCase.id(self) + os.makedirs(profile_dir, exist_ok=True) + profile.encode(f"{profile_dir}/{profile_file}.json") + self.assertTrue(res, "Simulation time limit exceeded") def tick(self, cycle_cnt: int = 1): diff --git a/test/common/profiler.py b/test/common/profiler.py new file mode 100644 index 000000000..58c236153 --- /dev/null +++ b/test/common/profiler.py @@ -0,0 +1,85 @@ +import os.path +from amaranth.sim import * +from transactron.core import MethodMap, TransactionManager +from transactron.profiler import CycleProfile, Profile, ProfileInfo +from transactron.utils import SrcLoc +from .functions import TestGen + +__all__ = ["profiler_process"] + + +def profiler_process(transaction_manager: TransactionManager, profile: Profile, clk_period: float): + def process() -> TestGen: + method_map = MethodMap(transaction_manager.transactions) + cgr, _, _ = TransactionManager._conflict_graph(method_map) + id_map = dict[int, int]() + id_seq = 0 + + def get_id(obj): + try: + return id_map[id(obj)] + except KeyError: + nonlocal id_seq + id_seq = id_seq + 1 + id_map[id(obj)] = id_seq + return id_seq + + def local_src_loc(src_loc: SrcLoc): + return (os.path.relpath(src_loc[0]), src_loc[1]) + + for transaction in method_map.transactions: + profile.transactions_and_methods[get_id(transaction)] = ProfileInfo( + transaction.owned_name, local_src_loc(transaction.src_loc), True + ) + + for method in method_map.methods: + profile.transactions_and_methods[get_id(method)] = ProfileInfo( + method.owned_name, local_src_loc(method.src_loc), False + ) + + yield Passive() + while True: + yield Delay((1 - 1e-4) * clk_period) # shorter than one clock cycle + + cprof = CycleProfile() + profile.cycles.append(cprof) + + for transaction in method_map.transactions: + request = yield transaction.request + runnable = yield transaction.runnable + grant = yield transaction.grant + + if grant: + cprof.running[get_id(transaction)] = None + elif request and runnable: + for transaction2 in cgr[transaction]: + if (yield transaction2.grant): + cprof.locked[get_id(transaction)] = get_id(transaction2) + + running = set(cprof.running) + for method in method_map.methods: + if (yield method.run): + running.add(get_id(method)) + + locked_methods = set[int]() + for method in method_map.methods: + if get_id(method) not in running: + if any(get_id(transaction) in running for transaction in method_map.transactions_by_method[method]): + locked_methods.add(get_id(method)) + + for method in method_map.methods: + if get_id(method) in running: + for t_or_m in method_map.method_parents[method]: + if get_id(t_or_m) in running: + cprof.running[get_id(method)] = get_id(t_or_m) + elif get_id(method) in locked_methods: + caller = next( + get_id(t_or_m) + for t_or_m in method_map.method_parents[method] + if get_id(t_or_m) in running or get_id(t_or_m) in locked_methods + ) + cprof.locked[get_id(method)] = caller + + yield + + return process diff --git a/test/transactions/test_methods.py b/test/transactions/test_methods.py index 22c3dab45..838199e0a 100644 --- a/test/transactions/test_methods.py +++ b/test/transactions/test_methods.py @@ -612,7 +612,6 @@ def process(): yield self.circ.req_t2.eq(req_t2) yield self.circ.ready.eq(m_ready) yield Settle() - yield Delay(1e-8) out_m = yield self.circ.out_m out_t1 = yield self.circ.out_t1 @@ -632,8 +631,10 @@ def process(): self.assertTrue(in1 != self.bad_number or not out_t1) self.assertTrue(in2 != self.bad_number or not out_t2) + yield + with self.run_simulation(self.circ, 100) as sim: - sim.add_process(process) + sim.add_sync_process(process) def test_random_arg(self): self.base_random(lambda arg: arg.data != self.bad_number) diff --git a/transactron/core.py b/transactron/core.py index 98daa8b76..0fd6fc29e 100644 --- a/transactron/core.py +++ b/transactron/core.py @@ -76,6 +76,7 @@ def __init__(self, transactions: Iterable["Transaction"]): self.methods_by_transaction = dict[Transaction, list[Method]]() self.transactions_by_method = defaultdict[Method, list[Transaction]](list) self.readiness_by_method_and_transaction = dict[tuple[Transaction, Method], ValueLike]() + self.method_parents = defaultdict[Method, list[TransactionBase]](list) def rec(transaction: Transaction, source: TransactionBase): for method, (arg_rec, _) in source.method_uses.items(): @@ -92,6 +93,10 @@ def rec(transaction: Transaction, source: TransactionBase): self.methods_by_transaction[transaction] = [] rec(transaction, transaction) + for transaction_or_method in self.methods_and_transactions: + for method in transaction_or_method.method_uses.keys(): + self.method_parents[method].append(transaction_or_method) + def transactions_for(self, elem: TransactionOrMethod) -> Collection["Transaction"]: if isinstance(elem, Transaction): return [elem] diff --git a/transactron/profiler.py b/transactron/profiler.py new file mode 100644 index 000000000..410538ac4 --- /dev/null +++ b/transactron/profiler.py @@ -0,0 +1,194 @@ +from collections import defaultdict +from typing import Optional +from dataclasses import dataclass, field +from transactron.utils import SrcLoc +from dataclasses_json import dataclass_json + + +__all__ = ["ProfileInfo", "Profile"] + + +@dataclass_json +@dataclass +class ProfileInfo: + """Information about transactions and methods. In `Profile`, transactions + and methods are referred to by their unique ID numbers. + + Attributes + ---------- + name : str + The name. + src_loc : SrcLoc + Source location. + is_transaction : bool + If true, this object describes a transaction; if false, a method. + """ + + name: str + src_loc: SrcLoc + is_transaction: bool + + +@dataclass +class RunStat: + """Collected statistics about a transaction or method. + + Attributes + ---------- + name : str + The name. + src_loc : SrcLoc + Source location. + locked : int + For methods: the number of cycles this method was locked because of + a disabled call (a call under a false condition). For transactions: + the number of cycles this transaction was ready to run, but did not + run because a conflicting transaction has run instead. + """ + + name: str + src_loc: str + locked: int = 0 + run: int = 0 + + @staticmethod + def make(info: ProfileInfo): + return RunStat(info.name, f"{info.src_loc[0]}:{info.src_loc[1]}") + + +@dataclass +class RunStatNode: + """A statistics tree. Summarizes call graph information. + + Attributes + ---------- + stat : RunStat + Statistics. + callers : dict[int, RunStatNode] + Statistics for the method callers. For transactions, this is empty. + """ + + stat: RunStat + callers: dict[int, "RunStatNode"] = field(default_factory=dict) + + @staticmethod + def make(info: ProfileInfo): + return RunStatNode(RunStat.make(info)) + + +@dataclass_json +@dataclass +class CycleProfile: + """Profile information for a single clock cycle. + + Transactions and methods are referred to by unique IDs. + + Attributes + ---------- + locked : dict[int, int] + For each transaction which didn't run because of a conflict, the + transaction which has run instead. For each method which was used + but didn't run because of a disabled call, the caller which + used it. + running : dict[int, Optional[int]] + For each running method, its caller. Running transactions don't + have a caller (the value is `None`). + """ + + locked: dict[int, int] = field(default_factory=dict) + running: dict[int, Optional[int]] = field(default_factory=dict) + + +@dataclass_json +@dataclass +class Profile: + """Transactron execution profile. + + Can be saved by the simulator, and then restored by an analysis tool. + In the profile data structure, methods and transactions are referred to + by their unique ID numbers. + + Attributes + ---------- + transactions_and_methods : dict[int, ProfileInfo] + Information about transactions and methods indexed by ID numbers. + cycles : list[CycleProfile] + Profile information for each cycle of the simulation. + """ + + transactions_and_methods: dict[int, ProfileInfo] = field(default_factory=dict) + cycles: list[CycleProfile] = field(default_factory=list) + + def encode(self, file_name: str): + with open(file_name, "w") as fp: + fp.write(self.to_json()) # type: ignore + + @staticmethod + def decode(file_name: str) -> "Profile": + with open(file_name, "r") as fp: + return Profile.from_json(fp.read()) # type: ignore + + def analyze_transactions(self, recursive=False) -> list[RunStatNode]: + stats = {i: RunStatNode.make(info) for i, info in self.transactions_and_methods.items() if info.is_transaction} + + def rec(c: CycleProfile, node: RunStatNode, i: int): + if i in c.running: + node.stat.run += 1 + elif i in c.locked: + node.stat.locked += 1 + if recursive: + for j in called[i]: + if j not in node.callers: + node.callers[j] = RunStatNode.make(self.transactions_and_methods[j]) + rec(c, node.callers[j], j) + + for c in self.cycles: + called = defaultdict[int, set[int]](set) + + for i, j in c.running.items(): + if j is not None: + called[j].add(i) + + for i, j in c.locked.items(): + called[j].add(i) + + for i in c.running: + if i in stats: + rec(c, stats[i], i) + + for i in c.locked: + if i in stats: + stats[i].stat.locked += 1 + + return list(stats.values()) + + def analyze_methods(self, recursive=False) -> list[RunStatNode]: + stats = { + i: RunStatNode.make(info) for i, info in self.transactions_and_methods.items() if not info.is_transaction + } + + def rec(c: CycleProfile, node: RunStatNode, i: int, locking_call=False): + if i in c.running: + if not locking_call: + node.stat.run += 1 + else: + node.stat.locked += 1 + caller = c.running[i] + else: + node.stat.locked += 1 + caller = c.locked[i] + if recursive and caller is not None: + if caller not in node.callers: + node.callers[caller] = RunStatNode.make(self.transactions_and_methods[caller]) + rec(c, node.callers[caller], caller, locking_call) + + for c in self.cycles: + for i in c.running: + if i in stats: + rec(c, stats[i], i) + + for i in c.locked: + if i in stats: + rec(c, stats[i], i, locking_call=True) + + return list(stats.values())