Merge pull request #14 from LarsKue/experiments

More Modules, Metrics, Bugfixes, and more
LarsKue · Oct 27, 2023 · ee66bdd · ee66bdd
2 parents 91b59a8 + 935ffdd
commit ee66bdd
Show file tree

Hide file tree

Showing 45 changed files with 769 additions and 655 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -9,8 +9,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest]
-        python-version: ["3.10"]
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ["3.10", "3.11"]
 
     steps:
       - uses: actions/checkout@v3

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ description = "A light-weight lightning_trainable module for pytorch-lightning."
 readme = "README.md"
 requires-python = ">=3.10"
 license = { file = "LICENSE" }
-keywords = ["Machine Learning", "PyTorch", "PyTorch-Lightning"]
+keywords = ["Machine-Learning", "PyTorch", "PyTorch-Lightning"]
 
 authors = [
     { name = "Lars Kühmichel", email = "[email protected]" }
@@ -57,7 +57,9 @@ tests = [
 ]
 
 experiments = [
-    # these are just recommended packages to run experiments
+    # required
+    "ray[tune] ~= 2.4",
+    # recommended
     "numpy ~= 1.24",
     "matplotlib ~= 3.7",
     "jupyterlab ~= 3.6",

diff --git a/src/lightning_trainable/callbacks/epoch_progress_bar.py b/src/lightning_trainable/callbacks/epoch_progress_bar.py
@@ -2,7 +2,11 @@
 from lightning.pytorch.callbacks import ProgressBar
 from lightning.pytorch.callbacks.progress.tqdm_progress import Tqdm
 
+from lightning_trainable.utils import deprecate
 
+
+@deprecate("EpochProgressBar causes issues when continuing training or using multi-GPU. "
+           "Use the default Lightning ProgressBar instead.")
 class EpochProgressBar(ProgressBar):
     def __init__(self):
         super().__init__()
@@ -25,6 +29,8 @@ def on_train_end(self, trainer, pl_module):
         self.bar.close()
 
 
+@deprecate("StepProgressBar causes issues when continuing training or using multi-GPU. "
+           "Use the default Lightning ProgressBar instead.")
 class StepProgressBar(ProgressBar):
     def __init__(self):
         super().__init__()

diff --git a/src/lightning_trainable/datasets/core/__init__.py b/src/lightning_trainable/datasets/core/__init__.py
@@ -1 +1,2 @@
+from .distribution_dataset import DistributionDataset
 from .joint import JointDataset, JointIterableDataset
diff --git a/src/lightning_trainable/hparams/attribute_dict.py b/src/lightning_trainable/hparams/attribute_dict.py
@@ -14,3 +14,8 @@ def __getattribute__(self, item):
 
     def __setattr__(self, key, value):
         self[key] = value
+
+    def copy(self):
+        # copies of AttributeDicts should be AttributeDicts
+        # see also https://github.com/LarsKue/lightning-trainable/issues/13
+        return self.__class__(**super().copy())
diff --git a/src/lightning_trainable/hparams/types/choice.py b/src/lightning_trainable/hparams/types/choice.py
@@ -11,6 +11,9 @@ def __call__(cls, *choices):
         namespace = {"choices": choices}
         return type(name, bases, namespace)
 
+    def __repr__(cls):
+        return f"Choice{cls.choices!r}"
+
 
 class Choice(metaclass=ChoiceMeta):
     """

diff --git a/src/lightning_trainable/hparams/types/range.py b/src/lightning_trainable/hparams/types/range.py
@@ -21,6 +21,9 @@ def __call__(cls, lower: float | int, upper: float | int, exclude: str | None =
         namespace = {"lower": lower, "upper": upper, "exclude": exclude}
         return type(name, bases, namespace)
 
+    def __repr__(self):
+        return f"Range({self.lower!r}, {self.upper!r}, exclude={self.exclude!r})"
+
 
 class Range(metaclass=RangeMeta):
     """

diff --git a/src/lightning_trainable/metrics/__init__.py b/src/lightning_trainable/metrics/__init__.py
@@ -1 +1,4 @@
 from .accuracy import accuracy
+from .error import error
+from .sinkhorn import sinkhorn_auto as sinkhorn
+from .wasserstein import wasserstein
diff --git a/src/lightning_trainable/metrics/error.py b/src/lightning_trainable/metrics/error.py
@@ -0,0 +1,6 @@
+
+from .accuracy import accuracy
+
+
+def error(logits, targets, *, k=1):
+    return 1.0 - accuracy(logits, targets, k=k)
diff --git a/src/lightning_trainable/metrics/sinkhorn.py b/src/lightning_trainable/metrics/sinkhorn.py
@@ -0,0 +1,107 @@
+import warnings
+
+import torch
+from torch import Tensor
+
+import torch.nn.functional as F
+import numpy as np
+
+
+def sinkhorn(a: Tensor, b: Tensor, cost: Tensor, epsilon: float, steps: int = 1000) -> Tensor:
+    """
+    Computes the Sinkhorn optimal transport plan from sample weights of two distributions.
+    This version does not use log-space computations, but allows for zero or negative weights.
+
+    @param a: Sample weights from the first distribution in shape (n,)
+    @param b: Sample weights from the second distribution in shape (m,)
+    @param cost: Cost matrix in shape (n, m).
+    @param epsilon: Entropic regularization parameter.
+    @param steps: Number of iterations.
+    """
+    if cost.shape != (len(a), len(b)):
+        raise ValueError(f"Expected cost to have shape {(len(a), len(b))}, but got {cost.shape}.")
+
+    gain = torch.exp(-cost / epsilon)
+
+    if gain.mean() < 1e-30:
+        warnings.warn(f"Detected low bandwidth ({epsilon:.1e}) relative to cost ({cost.mean().item():.1e}). "
+                      f"You may experience numerical instabilities. Consider increasing epsilon or using sinkhorn_log.")
+
+    # Initialize the dual variables.
+    u = torch.ones(len(a), dtype=a.dtype, device=a.device)
+    v = torch.ones(len(b), dtype=b.dtype, device=b.device)
+
+    # Compute the Sinkhorn iterations.
+    for _ in range(steps):
+        v = b / (torch.matmul(gain.T, u) + 1e-50)
+        u = a / (torch.matmul(gain, v) + 1e-50)
+
+    # Return the transport plan.
+    return u[:, None] * gain * v[None, :]
+
+
+def sinkhorn_log(log_a: Tensor, log_b: Tensor, cost: Tensor, epsilon: float, steps: int = 1000) -> Tensor:
+    """
+    Computes the Sinkhorn optimal transport plan from sample weights of two distributions.
+    This version uses log-space computations to avoid numerical instabilities, but disallows zero or negative weights.
+
+    @param log_a: Log sample weights from the first distribution in shape (n,)
+    @param log_b: Log sample weights from the second distribution in shape (m,)
+    @param cost: Cost matrix in shape (n, m).
+    @param epsilon: Entropic regularization parameter.
+    @param steps: Number of iterations.
+    """
+    if cost.shape != (len(log_a), len(log_b)):
+        raise ValueError(f"Expected cost to have shape {(len(log_a), len(log_b))}, but got {cost.shape}.")
+
+    log_gain = -cost / epsilon
+
+    # Initialize the dual variables.
+    log_u = torch.zeros(len(log_a), dtype=log_a.dtype, device=log_a.device)
+    log_v = torch.zeros(len(log_b), dtype=log_b.dtype, device=log_b.device)
+
+    # Compute the Sinkhorn iterations.
+    for _ in range(steps):
+        log_v = log_b - torch.logsumexp(log_gain + log_u[:, None], dim=0)
+        log_u = log_a - torch.logsumexp(log_gain + log_v[None, :], dim=1)
+
+    plan = torch.exp(log_u[:, None] + log_gain + log_v[None, :])
+
+    if not torch.allclose(len(log_b) * plan.sum(dim=0), torch.ones(len(log_b), device=plan.device)) or not torch.allclose(len(log_a) * plan.sum(dim=1), torch.ones(len(log_a), device=plan.device)):
+        warnings.warn(f"Sinkhorn did not converge. Consider increasing epsilon or number of iterations.")
+
+    # Return the transport plan.
+    return plan
+
+
+def sinkhorn_auto(x: Tensor, y: Tensor, cost: Tensor = None, epsilon: float = 1.0, steps: int = 1000) -> Tensor:
+    """
+    Computes the Sinkhorn optimal transport plan from samples from two distributions.
+    See also: <cref>sinkhorn_log</cref>
+
+    @param x: Samples from the first distribution in shape (n, ...).
+    @param y: Samples from the second distribution in shape (m, ...).
+    @param cost: Optional cost matrix in shape (n, m).
+        If not provided, the Euclidean distance is used.
+    @param epsilon: Optional entropic regularization parameter.
+        This parameter is normalized to the half-mean of the cost matrix. This helps keep the value independent
+        of the data dimensionality. Note that this behaviour is exclusive to this method; sinkhorn_log only accepts
+        the raw entropic regularization value.
+    @param steps: Number of iterations.
+    """
+    if x.shape[1:] != y.shape[1:]:
+        raise ValueError(f"Expected x and y to live in the same feature space, "
+                         f"but got {x.shape[1:]} and {y.shape[1:]}.")
+    if cost is None:
+        cost = x[:, None] - y[None, :]
+        cost = torch.flatten(cost, start_dim=2)
+        cost = torch.linalg.norm(cost, dim=-1)
+
+    # Initialize epsilon independent of the data dimension (i.e. dependent on the mean cost)
+    epsilon = epsilon * cost.mean() / 2
+
+    # Initialize the sample weights.
+    log_a = torch.zeros(len(x), device=x.device) - np.log(len(x))
+    log_b = torch.zeros(len(y), device=y.device) - np.log(len(y))
+
+    return sinkhorn_log(log_a, log_b, cost, epsilon, steps)
diff --git a/src/lightning_trainable/metrics/wasserstein.py b/src/lightning_trainable/metrics/wasserstein.py
@@ -0,0 +1,18 @@
+
+import torch
+from torch import Tensor
+
+from .sinkhorn import sinkhorn_auto
+
+
+def wasserstein(x: Tensor, y: Tensor, cost: Tensor = None, epsilon: float = 0.1, steps: int = 10) -> Tensor:
+    """
+    Computes the Wasserstein distance between two distributions.
+    See also: <cref>sinkhorn_auto</cref>
+    """
+    if cost is None:
+        cost = x[:, None] - y[None, :]
+        cost = torch.flatten(cost, start_dim=2)
+        cost = torch.linalg.norm(cost, dim=-1)
+
+    return torch.sum(sinkhorn_auto(x, y, cost, epsilon, steps) * cost)
diff --git a/src/lightning_trainable/modules/__init__.py b/src/lightning_trainable/modules/__init__.py
@@ -1,4 +1,3 @@
-from .convolutional import ConvolutionalNetwork, ConvolutionalNetworkHParams
 from .fully_connected import FullyConnectedNetwork, FullyConnectedNetworkHParams
 from .hparams_module import HParamsModule
-from .unet import UNet, UNetHParams, UNetBlockHParams
+from .simple_unet import SimpleUNet, SimpleUNetHParams
diff --git a/src/lightning_trainable/modules/convolutional/__init__.py b/src/lightning_trainable/modules/convolutional/__init__.py
diff --git a/src/lightning_trainable/modules/convolutional/block.py b/src/lightning_trainable/modules/convolutional/block.py
diff --git a/src/lightning_trainable/modules/convolutional/block_hparams.py b/src/lightning_trainable/modules/convolutional/block_hparams.py
diff --git a/src/lightning_trainable/modules/convolutional/hparams.py b/src/lightning_trainable/modules/convolutional/hparams.py
diff --git a/src/lightning_trainable/modules/convolutional/network.py b/src/lightning_trainable/modules/convolutional/network.py
diff --git a/src/lightning_trainable/modules/fully_connected/hparams.py b/src/lightning_trainable/modules/fully_connected/hparams.py
@@ -8,3 +8,6 @@ class FullyConnectedNetworkHParams(HParams):
 
     layer_widths: list[int]
     activation: str = "relu"
+
+    norm: Choice("none", "batch", "layer") = "none"
+    dropout: float = 0.0
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from .distribution_dataset import DistributionDataset
		from .joint import JointDataset, JointIterableDataset