From 9b689aa5fe23852d98e76c974e62ae54b53f5c8c Mon Sep 17 00:00:00 2001 From: Nicholas Junge Date: Thu, 21 Mar 2024 11:27:44 +0100 Subject: [PATCH] Add transform submodule, parameter compression transform This is the better way of compressing parameters compared to directly in the benchmark runner, which steals responsibility of the transform that we just introduced. Refactors `nnbench.io.transform->nnbench.transforms`, the latter being its own submodule. This is useful to have when adding new builtin transforms, so that they do not have to go into a single file. --- docs/guides/transforms.md | 2 +- examples/transforms/transforms.py | 2 +- src/nnbench/transforms/__init__.py | 1 + .../{io/transforms.py => transforms/base.py} | 0 src/nnbench/transforms/params.py | 48 +++++++++++++++++++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 src/nnbench/transforms/__init__.py rename src/nnbench/{io/transforms.py => transforms/base.py} (100%) create mode 100644 src/nnbench/transforms/params.py diff --git a/docs/guides/transforms.md b/docs/guides/transforms.md index 200b084..381c9ef 100644 --- a/docs/guides/transforms.md +++ b/docs/guides/transforms.md @@ -4,7 +4,7 @@ After a successful benchmark run execution, you end up with your metrics, contex In general, this data is a best-effort representation of the environment and configuration the benchmarks are run in. However, in some situations, manual editing and transformation of these records is required. -nnbench exposes the `nnbench.io.transforms` module to facilitate these transforms. +nnbench exposes the `nnbench.transforms` module to facilitate these transforms. ## Types of transforms: 1->1 vs. N->1 vs. N->N diff --git a/examples/transforms/transforms.py b/examples/transforms/transforms.py index be4a711..c03f62c 100644 --- a/examples/transforms/transforms.py +++ b/examples/transforms/transforms.py @@ -4,8 +4,8 @@ import numpy as np import nnbench -from nnbench.io.transforms import OneToOneTransform from nnbench.reporter.file import FileIO +from nnbench.transforms import OneToOneTransform from nnbench.types import BenchmarkRecord diff --git a/src/nnbench/transforms/__init__.py b/src/nnbench/transforms/__init__.py new file mode 100644 index 0000000..45d380f --- /dev/null +++ b/src/nnbench/transforms/__init__.py @@ -0,0 +1 @@ +from .base import ManyToManyTransform, ManyToOneTransform, OneToOneTransform diff --git a/src/nnbench/io/transforms.py b/src/nnbench/transforms/base.py similarity index 100% rename from src/nnbench/io/transforms.py rename to src/nnbench/transforms/base.py diff --git a/src/nnbench/transforms/params.py b/src/nnbench/transforms/params.py new file mode 100644 index 0000000..6c8c70c --- /dev/null +++ b/src/nnbench/transforms/params.py @@ -0,0 +1,48 @@ +from typing import Any, Sequence + +from nnbench.transforms import ManyToManyTransform, OneToOneTransform +from nnbench.types import BenchmarkRecord + + +class CompressionMixin: + def compress(self, params: dict[str, Any]) -> dict[str, Any]: + containers = (tuple, list, set, frozenset) + natives = (float, int, str, bool, bytes, complex) + compressed: dict[str, Any] = {} + + def _compress_impl(val): + if isinstance(val, natives): + # save native types without modification... + return val + else: + # ... or return the string repr. + # TODO: Allow custom representations for types with formatters. + return repr(val) + + for k, v in params.items(): + if isinstance(v, containers): + container_type = type(v) + compressed[k] = container_type(_compress_impl(vv) for vv in v) + elif isinstance(v, dict): + compressed[k] = self.compress(v) + else: + compressed[k] = _compress_impl(v) + + return compressed + + +class ParameterCompression1to1(OneToOneTransform, CompressionMixin): + def apply(self, record: BenchmarkRecord) -> BenchmarkRecord: + for bm in record.benchmarks: + bm["params"] = self.compress(bm["params"]) + + return record + + +class ParameterCompressionNtoN(ManyToManyTransform, CompressionMixin): + def apply(self, record: Sequence[BenchmarkRecord]) -> Sequence[BenchmarkRecord]: + for rec in record: + for bm in rec.benchmarks: + bm["params"] = self.compress(bm["params"]) + + return record