From 9b689aa5fe23852d98e76c974e62ae54b53f5c8c Mon Sep 17 00:00:00 2001
From: Nicholas Junge <n.junge@appliedai.de>
Date: Thu, 21 Mar 2024 11:27:44 +0100
Subject: [PATCH] Add transform submodule, parameter compression transform

This is the better way of compressing parameters compared to directly in
the benchmark runner, which steals responsibility of the transform that
we just introduced.

Refactors `nnbench.io.transform->nnbench.transforms`, the latter being
its own submodule. This is useful to have when adding new builtin
transforms, so that they do not have to go into a single file.
---
 docs/guides/transforms.md                     |  2 +-
 examples/transforms/transforms.py             |  2 +-
 src/nnbench/transforms/__init__.py            |  1 +
 .../{io/transforms.py => transforms/base.py}  |  0
 src/nnbench/transforms/params.py              | 48 +++++++++++++++++++
 5 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 src/nnbench/transforms/__init__.py
 rename src/nnbench/{io/transforms.py => transforms/base.py} (100%)
 create mode 100644 src/nnbench/transforms/params.py

diff --git a/docs/guides/transforms.md b/docs/guides/transforms.md
index 200b084..381c9ef 100644
--- a/docs/guides/transforms.md
+++ b/docs/guides/transforms.md
@@ -4,7 +4,7 @@ After a successful benchmark run execution, you end up with your metrics, contex
 In general, this data is a best-effort representation of the environment and configuration the benchmarks are run in.
 
 However, in some situations, manual editing and transformation of these records is required.
-nnbench exposes the `nnbench.io.transforms` module to facilitate these transforms.
+nnbench exposes the `nnbench.transforms` module to facilitate these transforms.
 
 ## Types of transforms: 1->1 vs. N->1 vs. N->N
 
diff --git a/examples/transforms/transforms.py b/examples/transforms/transforms.py
index be4a711..c03f62c 100644
--- a/examples/transforms/transforms.py
+++ b/examples/transforms/transforms.py
@@ -4,8 +4,8 @@
 import numpy as np
 
 import nnbench
-from nnbench.io.transforms import OneToOneTransform
 from nnbench.reporter.file import FileIO
+from nnbench.transforms import OneToOneTransform
 from nnbench.types import BenchmarkRecord
 
 
diff --git a/src/nnbench/transforms/__init__.py b/src/nnbench/transforms/__init__.py
new file mode 100644
index 0000000..45d380f
--- /dev/null
+++ b/src/nnbench/transforms/__init__.py
@@ -0,0 +1 @@
+from .base import ManyToManyTransform, ManyToOneTransform, OneToOneTransform
diff --git a/src/nnbench/io/transforms.py b/src/nnbench/transforms/base.py
similarity index 100%
rename from src/nnbench/io/transforms.py
rename to src/nnbench/transforms/base.py
diff --git a/src/nnbench/transforms/params.py b/src/nnbench/transforms/params.py
new file mode 100644
index 0000000..6c8c70c
--- /dev/null
+++ b/src/nnbench/transforms/params.py
@@ -0,0 +1,48 @@
+from typing import Any, Sequence
+
+from nnbench.transforms import ManyToManyTransform, OneToOneTransform
+from nnbench.types import BenchmarkRecord
+
+
+class CompressionMixin:
+    def compress(self, params: dict[str, Any]) -> dict[str, Any]:
+        containers = (tuple, list, set, frozenset)
+        natives = (float, int, str, bool, bytes, complex)
+        compressed: dict[str, Any] = {}
+
+        def _compress_impl(val):
+            if isinstance(val, natives):
+                # save native types without modification...
+                return val
+            else:
+                # ... or return the string repr.
+                # TODO: Allow custom representations for types with formatters.
+                return repr(val)
+
+        for k, v in params.items():
+            if isinstance(v, containers):
+                container_type = type(v)
+                compressed[k] = container_type(_compress_impl(vv) for vv in v)
+            elif isinstance(v, dict):
+                compressed[k] = self.compress(v)
+            else:
+                compressed[k] = _compress_impl(v)
+
+        return compressed
+
+
+class ParameterCompression1to1(OneToOneTransform, CompressionMixin):
+    def apply(self, record: BenchmarkRecord) -> BenchmarkRecord:
+        for bm in record.benchmarks:
+            bm["params"] = self.compress(bm["params"])
+
+        return record
+
+
+class ParameterCompressionNtoN(ManyToManyTransform, CompressionMixin):
+    def apply(self, record: Sequence[BenchmarkRecord]) -> Sequence[BenchmarkRecord]:
+        for rec in record:
+            for bm in rec.benchmarks:
+                bm["params"] = self.compress(bm["params"])
+
+        return record