From ac87ef918962d8fcea68dd97cc801f09c9444e06 Mon Sep 17 00:00:00 2001
From: oripress <theoripress@gmail.com>
Date: Sat, 19 Nov 2022 12:37:41 +0100
Subject: [PATCH] CCC: Continuously Changing Corruptions (#50)

Co-authored-by: oripress <ori.press@bethgelab.org>
Co-authored-by: Steffen Schneider <steffen@bethgelab.org>
Co-authored-by: Alexander Panfilov <39771221+kotekjedi@users.noreply.github.com>
Co-authored-by: zimmerrol <rzrolandzimmermann@gmail.com>
---
 requirements.txt                         |   6 +-
 requirements_dev.txt                     |   2 +
 setup.cfg                                |  26 +-
 shifthappens/tasks/__init__.py           |   1 +
 shifthappens/tasks/ccc/__init__.py       |   1 +
 shifthappens/tasks/ccc/ccc.py            |  98 ++++
 shifthappens/tasks/ccc/ccc_imagenet_c.py | 625 +++++++++++++++++++++++
 shifthappens/tasks/ccc/ccc_lmdb.py       | 132 +++++
 shifthappens/tasks/ccc/ccc_utils.py      | 500 ++++++++++++++++++
 9 files changed, 1387 insertions(+), 4 deletions(-)
 create mode 100644 shifthappens/tasks/ccc/__init__.py
 create mode 100644 shifthappens/tasks/ccc/ccc.py
 create mode 100644 shifthappens/tasks/ccc/ccc_imagenet_c.py
 create mode 100644 shifthappens/tasks/ccc/ccc_lmdb.py
 create mode 100644 shifthappens/tasks/ccc/ccc_utils.py

diff --git a/requirements.txt b/requirements.txt
index ebcc420c..39297056 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,8 @@
 numpy
 torch
 torchvision
-surgeon_pytorch
\ No newline at end of file
+surgeon_pytorch
+lmdb
+pyarrow
+pandas
+pillow
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
index deb4b8d4..fd8f735f 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -10,3 +10,5 @@ sphinx_autodoc_typehints==1.18.1
 sphinx_copybutton==0.5.0
 pydata_sphinx_theme==0.8.1
 interrogate==1.5.0
+pandas-stubs==1.5.1.221024
+types-Pillow==9.3.0.1
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 9fcbbe96..25db2621 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,9 +9,6 @@ ignore = E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W
 [mypy]
 python_version = 3.8
 
-[mypy-numpy]
-ignore_missing_imports = True
-
 [mypy-pytest]
 ignore_missing_imports = True
 
@@ -21,6 +18,27 @@ ignore_missing_imports = True
 [mypy-surgeon_pytorch]
 ignore_missing_imports = True
 
+[mypy-lmdb]
+ignore_missing_imports = True
+
+[mypy-pyarrow]
+ignore_missing_imports = True
+
+[mypy-tqdm]
+ignore_missing_imports = True
+
+[mypy-scipy.*]
+ignore_missing_imports = True
+
+[mypy-skimage.*]
+ignore_missing_imports = True
+
+[mypy-cv2]
+ignore_missing_imports = True
+
+[mypy-wand.*]
+ignore_missing_imports = True
+
 [metadata]
 name = shifthappens
 version = attr: shifthappens.__version__
@@ -66,6 +84,8 @@ dev =
     sphinx_autodoc_typehints==1.18.1
     sphinx_copybutton==0.5.0
     pydata_sphinx_theme==0.8.1
+    pandas-stubs==1.5.1.221024
+    types-Pillow==9.3.0.1
 
 
 [options.packages.find]
diff --git a/shifthappens/tasks/__init__.py b/shifthappens/tasks/__init__.py
index 7ee3a6f4..b0cf4f2b 100644
--- a/shifthappens/tasks/__init__.py
+++ b/shifthappens/tasks/__init__.py
@@ -1,5 +1,6 @@
 """Utility methods and classes for the benchmark's tasks and the individual tasks."""
 
+from shifthappens.tasks import ccc  # noqa: F401
 from shifthappens.tasks import imagenet_c  # noqa: F401
 from shifthappens.tasks import imagenet_cartoon  # noqa: F401
 from shifthappens.tasks import imagenet_d  # noqa: F401
diff --git a/shifthappens/tasks/ccc/__init__.py b/shifthappens/tasks/ccc/__init__.py
new file mode 100644
index 00000000..e2463fbc
--- /dev/null
+++ b/shifthappens/tasks/ccc/__init__.py
@@ -0,0 +1 @@
+"""The Continuously Changing Corruptions task."""
diff --git a/shifthappens/tasks/ccc/ccc.py b/shifthappens/tasks/ccc/ccc.py
new file mode 100644
index 00000000..b72aadba
--- /dev/null
+++ b/shifthappens/tasks/ccc/ccc.py
@@ -0,0 +1,98 @@
+"""CCC: Continuously Changing Corruptions
+
+.. note::
+
+    This task only implements the data reading portion of the dataset.
+    In addition to this file, we submitted a file used to generate the
+    data itself.
+"""
+import dataclasses
+
+import numpy as np
+
+import shifthappens.data.torch as sh_data_torch
+from shifthappens import benchmark as sh_benchmark
+from shifthappens.config import imagenet_validation_path
+from shifthappens.data.base import DataLoader
+from shifthappens.models import base as sh_models
+from shifthappens.models.base import PredictionTargets
+from shifthappens.tasks.base import parameter
+from shifthappens.tasks.base import Task
+from shifthappens.tasks.ccc.ccc_utils import WalkLoader
+from shifthappens.tasks.metrics import Metric
+from shifthappens.tasks.task_result import TaskResult
+
+
+@sh_benchmark.register_task(name="CCC", relative_data_folder="ccc", standalone=True)
+@dataclasses.dataclass
+class CCC(Task):
+    """
+    The main task class for the CCC task.
+    This task only implements the data reading portion of the dataset.
+    """
+
+    seed: int = parameter(
+        default=43,
+        options=(43,),
+        description="random seed used in the dataset building process",
+    )
+    frequency: int = parameter(
+        default=5000,
+        options=(5000, 20000),
+        description="represents how many images are sampled from each subset",
+    )
+    base_amount: int = parameter(
+        default=750000,
+        options=(750000,),
+        description="represents how large the base dataset is",
+    )
+    accuracy: int = parameter(
+        default=20,
+        options=(0, 20, 40),
+        description="represents the baseline accuracy of walk",
+    )
+    subset_size: int = parameter(
+        default=5000,
+        options=(5000, 50000),
+        description="represents the sample size of images sampled from ImageNet validation",
+    )
+
+    def setup(self):
+        """Load and prepare the data."""
+
+        self.loader = WalkLoader(
+            imagenet_validation_path,
+            self.data_root,
+            self.seed,
+            self.frequency,
+            self.base_amount,
+            self.accuracy,
+            self.subset_size,
+        )
+
+    def _prepare_dataloader(self) -> DataLoader:
+        data = self.loader.generate_dataset()
+        self.targets = [s[1] for s in data]
+
+        return DataLoader(
+            sh_data_torch.IndexedTorchDataset(
+                sh_data_torch.ImagesOnlyTorchDataset(data)
+            ),
+            max_batch_size=None,
+        )
+
+    def _evaluate(self, model: sh_models.Model) -> TaskResult:
+        dataloader = self._prepare_dataloader()
+
+        all_predicted_labels_list = []
+        for predictions in model.predict(
+            dataloader, PredictionTargets(class_labels=True)
+        ):
+            all_predicted_labels_list.append(predictions.class_labels)
+        all_predicted_labels = np.concatenate(all_predicted_labels_list, 0)
+
+        accuracy = (all_predicted_labels == np.array(self.targets)).mean()
+        print(f"Accuracy: {accuracy}")
+        return TaskResult(
+            accuracy=accuracy, summary_metrics={Metric.Robustness: "accuracy"}
+        )
diff --git a/shifthappens/tasks/ccc/ccc_imagenet_c.py b/shifthappens/tasks/ccc/ccc_imagenet_c.py
new file mode 100644
index 00000000..54ba73f1
--- /dev/null
+++ b/shifthappens/tasks/ccc/ccc_imagenet_c.py
@@ -0,0 +1,625 @@
+"""
+ Adapted from https://github.com/hendrycks/robustness
+
+  Copyright 2018 Dan Hendrycks and Thomas Dietterich
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+"""
+import ctypes
+import os
+import os.path
+import warnings
+from io import BytesIO
+from typing import Callable
+from typing import Dict
+
+import cv2
+import numpy as np
+import skimage as sk
+from PIL import Image as PILImage
+from scipy import interpolate
+from scipy.ndimage import zoom as scizoom
+from scipy.ndimage.interpolation import map_coordinates
+from skimage.filters import gaussian
+from wand.api import library as wandlibrary
+from wand.image import Image as WandImage
+
+warnings.simplefilter("ignore", UserWarning)
+
+
+# /////////////// Data Loader ///////////////
+
+interpolation_function_dict: Dict[str, Callable] = dict()
+
+
+def is_image_file(filename):
+    """Checks if a file is an image.
+    Args:
+        filename (string): path to a file
+    Returns:
+        bool: True if the filename ends with a known image extension
+    """
+    filename_lower = filename.lower()
+    return any(
+        filename_lower.endswith(ext)
+        for ext in [".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm"]
+    )
+
+
+# /////////////// Distortion Helpers ///////////////
+
+
+def __disk(radius, alias_blur=0.1, dtype=np.float32):
+    if radius <= 8:
+        L = np.arange(-8, 8 + 1)
+        ksize = (3, 3)
+    else:
+        L = np.arange(-radius, radius + 1)
+        ksize = (5, 5)
+    X, Y = np.meshgrid(L, L)
+    aliased_disk = np.array((X**2 + Y**2) <= radius**2, dtype=dtype)
+    aliased_disk /= np.sum(aliased_disk)
+
+    # supersample disk to antialias
+    return cv2.GaussianBlur(aliased_disk, ksize=ksize, sigmaX=alias_blur)
+
+
+# Tell Python about the C method
+wandlibrary.MagickMotionBlurImage.argtypes = (
+    ctypes.c_void_p,  # wand
+    ctypes.c_double,  # radius
+    ctypes.c_double,  # sigma
+    ctypes.c_double,
+)  # angle
+
+
+class MotionImage(WandImage):
+    """Extend wand.image.Image class to include method signature"""
+
+    def motion_blur(self, radius=0.0, sigma=0.0, angle=0.0):
+        wandlibrary.MagickMotionBlurImage(self.wand, radius, sigma, angle)
+
+
+# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py
+def __plasma_fractal(mapsize=256, wibbledecay=3):
+    """
+    Generate a heightmap using diamond-square algorithm.
+    Return square 2d array, side length 'mapsize', of floats in range 0-255.
+    'mapsize' must be a power of two.
+    """
+    assert mapsize & (mapsize - 1) == 0
+    maparray = np.empty((mapsize, mapsize), dtype=np.float_)
+    maparray[0, 0] = 0
+    stepsize = mapsize
+    wibble = 100
+
+    def __wibbledmean(array):
+        return array / 4 + wibble * np.random.uniform(-wibble, wibble, array.shape)
+
+    def __fillsquares():
+        """For each square of points stepsize apart,
+        calculate middle value as mean of points + wibble"""
+        cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
+        squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0)
+        squareaccum += np.roll(squareaccum, shift=-1, axis=1)
+        maparray[
+            stepsize // 2 : mapsize : stepsize, stepsize // 2 : mapsize : stepsize
+        ] = __wibbledmean(squareaccum)
+
+    def __filldiamonds():
+        """For each diamond of points stepsize apart,
+        calculate middle value as mean of points + wibble"""
+        mapsize = maparray.shape[0]
+        drgrid = maparray[
+            stepsize // 2 : mapsize : stepsize, stepsize // 2 : mapsize : stepsize
+        ]
+        ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
+        ldrsum = drgrid + np.roll(drgrid, 1, axis=0)
+        lulsum = ulgrid + np.roll(ulgrid, -1, axis=1)
+        ltsum = ldrsum + lulsum
+        maparray[
+            0:mapsize:stepsize, stepsize // 2 : mapsize : stepsize
+        ] = __wibbledmean(ltsum)
+        tdrsum = drgrid + np.roll(drgrid, 1, axis=1)
+        tulsum = ulgrid + np.roll(ulgrid, -1, axis=0)
+        ttsum = tdrsum + tulsum
+        maparray[
+            stepsize // 2 : mapsize : stepsize, 0:mapsize:stepsize
+        ] = __wibbledmean(ttsum)
+
+    while stepsize >= 2:
+        __fillsquares()
+        __filldiamonds()
+        stepsize //= 2
+        wibble /= wibbledecay
+
+    maparray -= maparray.min()
+    return maparray / maparray.max()
+
+
+def __clipped_zoom(img, zoom_factor):
+    h = img.shape[0]
+    # ceil crop height(= crop width)
+    ch = int(np.ceil(h / zoom_factor))
+
+    top = (h - ch) // 2
+    img = scizoom(
+        img[top : top + ch, top : top + ch], (zoom_factor, zoom_factor, 1), order=1
+    )
+    # trim off any extra pixels
+    trim_top = (img.shape[0] - h) // 2
+
+    return img[trim_top : trim_top + h, trim_top : trim_top + h]
+
+
+# /////////////// End Distortion Helpers ///////////////
+
+
+# /////////////// Distortions ///////////////
+def __gaussian_noise(x, severity=1):
+    if "gaussian noise" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [0.0, 0.08, 0.12, 0.18, 0.26, 0.38],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["gaussian noise"] = f
+
+    f = interpolation_function_dict["gaussian noise"]
+
+    c = f(severity)
+
+    x = np.array(x) / 255.0
+    return np.clip(x + np.random.normal(size=x.shape, scale=c), 0, 1) * 255
+
+
+def __shot_noise(x, severity=1):
+    if "shot noise" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [
+                0,
+                float(1) / 60,
+                float(1) / 25,
+                float(1) / 12,
+                float(1) / 5,
+                float(1) / 3,
+            ],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["shot noise"] = f
+
+    f = interpolation_function_dict["shot noise"]
+
+    c = f(severity)
+    if c != 0:
+        c = float(1) / c
+    else:
+        c = 9999
+
+    x = np.array(x) / 255.0
+    return np.clip(np.random.poisson(x * c) / c, 0, 1) * 255
+
+
+def __impulse_noise(x, severity=1):
+    if "impulse noise" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5], [0, 0.03, 0.06, 0.09, 0.17, 0.27], axis=0, kind="linear"
+        )
+        interpolation_function_dict["impulse noise"] = f
+
+    f = interpolation_function_dict["impulse noise"]
+
+    c = f(severity)
+
+    x = sk.util.random_noise(np.array(x) / 255.0, mode="s&p", amount=c)
+    return np.clip(x, 0, 1) * 255
+
+
+def __glass_blur(x, severity=1):
+    if "glass blur" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [
+                (0.0, 0.0, 0.0),
+                (0.7, 1, 2),
+                (0.9, 2, 1),
+                (1, 2, 3),
+                (1.1, 3, 2),
+                (1.5, 4, 2),
+            ],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["glass blur"] = f
+
+    f = interpolation_function_dict["glass blur"]
+
+    c = f(severity)
+
+    if c[1] < 1:
+        c[1] = 1
+
+    x = np.uint8(gaussian(np.array(x) / 255.0, sigma=c[0], multichannel=True) * 255)
+
+    # locally shuffle pixels
+    for i in range(round(c[2])):
+        for h in range(224 - round(c[1]), round(c[1]), -1):
+            for w in range(224 - round(c[1]), round(c[1]), -1):
+                dx, dy = np.random.randint(-round(c[1]), round(c[1]), size=(2,))
+                h_prime, w_prime = h + dy, w + dx
+                # swap
+                x[h, w], x[h_prime, w_prime] = x[h_prime, w_prime], x[h, w]
+
+    return np.clip(gaussian(x / 255.0, sigma=c[0], multichannel=True), 0, 1) * 255
+
+
+def __defocus_blur(x, severity=1):
+    if "defocus blur" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [(0.0, 0.0), (3, 0.1), (4, 0.5), (6, 0.5), (8, 0.5), (10, 0.5)],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["defocus blur"] = f
+
+    f = interpolation_function_dict["defocus blur"]
+
+    c = f(severity)
+
+    x = np.array(x) / 255.0
+    kernel = __disk(radius=c[0], alias_blur=c[1])
+
+    channels = []
+    for d in range(3):
+        channels.append(cv2.filter2D(x[:, :, d], -1, kernel))
+    channels = np.array(channels).transpose((1, 2, 0))  # 3x224x224 -> 224x224x3
+
+    return np.clip(channels, 0, 1) * 255
+
+
+def __motion_blur(x, severity=1):
+    if "motion blur" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [(0.0, 0.0), (10, 3), (15, 5), (15, 8), (15, 12), (20, 15)],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["motion blur"] = f
+
+    f = interpolation_function_dict["motion blur"]
+
+    c = f(severity)
+
+    output = BytesIO()
+    x.save(output, format="PNG")
+    x = MotionImage(blob=output.getvalue())
+
+    x.motion_blur(radius=c[0], sigma=c[1], angle=np.random.uniform(-45, 45))
+
+    x = cv2.imdecode(np.fromstring(x.make_blob(), np.uint8), cv2.IMREAD_UNCHANGED)
+
+    if x.shape != (224, 224):
+        return np.clip(x[..., [2, 1, 0]], 0, 255)  # BGR to RGB
+    else:  # greyscale to RGB
+        return np.clip(np.array([x, x, x]).transpose((1, 2, 0)), 0, 255)
+
+
+def __zoom_blur(x, severity=1):
+    if "zoom blur" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [
+                (1.0, 1.0, 0.01),
+                (1, 1.11, 0.01),
+                (1, 1.16, 0.01),
+                (1, 1.21, 0.02),
+                (1, 1.26, 0.02),
+                (1, 1.31, 0.03),
+            ],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["zoom blur"] = f
+
+    f = interpolation_function_dict["zoom blur"]
+
+    c = f(severity)
+    c = np.arange(c[0], c[1], c[2])
+
+    x = (np.array(x) / 255.0).astype(np.float32)
+    out = np.zeros_like(x)
+    for zoom_factor in c:
+        out += __clipped_zoom(x, zoom_factor)
+
+    x = (x + out) / (len(c) + 1)
+    return np.clip(x, 0, 1) * 255
+
+
+def __fog(x, severity=1):
+    if "fog" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [(0.0, 2.0), (1.5, 2), (2, 2), (2.5, 1.7), (2.5, 1.5), (3, 1.4)],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["fog"] = f
+
+    f = interpolation_function_dict["fog"]
+
+    c = f(severity)
+    x = np.array(x) / 255.0
+    max_val = x.max()
+    x += c[0] * __plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
+    return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255
+
+
+def __frost(x, severity=1, data_dir=None):
+    if "frost" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [(1.0, 0.0), (1, 0.4), (0.8, 0.6), (0.7, 0.7), (0.65, 0.7), (0.6, 0.75)],
+            axis=0,
+            kind="linear",
+        )
+
+        interpolation_function_dict["frost"] = f
+
+    f = interpolation_function_dict["frost"]
+
+    c = f(severity)
+
+    idx = np.random.randint(5)
+    filename = [
+        "./frost/frost1.png",
+        "./frost/frost2.png",
+        "./frost/frost3.png",
+        "./frost/frost4.jpg",
+        "./frost/frost5.jpg",
+        "./frost/frost6.jpg",
+    ][idx]
+    # filename = os.path.abspath(os.path.join(data_dir, filename))
+    filename = os.path.join(data_dir, filename)
+    frost = cv2.imread(os.path.abspath(filename))
+    # frost = cv2.cv.LoadImage(os.path.abspath(filename), CV_LOAD_IMAGE_COLOR)
+    # randomly crop and convert to rgb
+    x_start, y_start = np.random.randint(0, frost.shape[0] - 224), np.random.randint(
+        0, frost.shape[1] - 224
+    )
+    frost = frost[x_start : x_start + 224, y_start : y_start + 224][..., [2, 1, 0]]
+
+    return np.clip(c[0] * np.array(x) + c[1] * frost, 0, 255)
+
+
+def __snow(x, severity=1):
+    if "snow" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [
+                (0.1, 0.3, 3, 1.0, 10, 4, 1.0),
+                (0.1, 0.3, 3, 0.5, 10, 4, 0.8),
+                (0.2, 0.3, 2, 0.5, 12, 4, 0.7),
+                (0.55, 0.3, 4, 0.9, 12, 8, 0.7),
+                (0.55, 0.3, 4.5, 0.85, 12, 8, 0.65),
+                (0.55, 0.3, 2.5, 0.85, 12, 12, 0.55),
+            ],
+            axis=0,
+            kind="linear",
+        )
+        interpolation_function_dict["snow"] = f
+
+    f = interpolation_function_dict["snow"]
+
+    c = f(severity)
+
+    x = np.array(x, dtype=np.float32) / 255.0
+    snow_layer = np.random.normal(
+        size=x.shape[:2], loc=c[0], scale=c[1]
+    )  # [:2] for monochrome
+
+    snow_layer = __clipped_zoom(snow_layer[..., np.newaxis], c[2])
+    snow_layer[snow_layer < c[3]] = 0
+
+    snow_layer = PILImage.fromarray(
+        (np.clip(snow_layer.squeeze(), 0, 1) * 255).astype(np.uint8), mode="L"
+    )
+    output = BytesIO()
+    snow_layer.save(output, format="PNG")
+    snow_layer = MotionImage(blob=output.getvalue())
+
+    snow_layer.motion_blur(radius=c[4], sigma=c[5], angle=np.random.uniform(-135, -45))
+
+    snow_layer = (
+        cv2.imdecode(
+            np.fromstring(snow_layer.make_blob(), np.uint8), cv2.IMREAD_UNCHANGED
+        )
+        / 255.0
+    )
+    snow_layer = snow_layer[..., np.newaxis]
+
+    x = c[6] * x + (1 - c[6]) * np.maximum(
+        x, cv2.cvtColor(x, cv2.COLOR_RGB2GRAY).reshape(224, 224, 1) * 1.5 + 0.5
+    )
+    return np.clip(x + snow_layer + np.rot90(snow_layer, k=2), 0, 1) * 255
+
+
+def __contrast(x, severity=1):
+    if "contrast" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5], [1.0, 0.4, 0.3, 0.2, 0.1, 0.05], axis=0, kind="linear"
+        )
+        interpolation_function_dict["contrast"] = f
+
+    f = interpolation_function_dict["contrast"]
+    c = f(severity)
+
+    x = np.array(x) / 255.0
+    means = np.mean(x, axis=(0, 1), keepdims=True)
+    return np.clip((x - means) * c + means, 0, 1) * 255
+
+
+def __brightness(x, c):
+    if "brightness" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5], [0.0, 0.1, 0.2, 0.3, 0.4, 0.5], kind="linear"
+        )
+        interpolation_function_dict["brightness"] = f
+
+    f = interpolation_function_dict["brightness"]
+    c = f(c)
+
+    x = np.array(x) / 255.0
+    x = sk.color.rgb2hsv(x)
+    x[:, :, 2] = np.clip(x[:, :, 2] + c, 0, 1)
+    x = sk.color.hsv2rgb(x)
+
+    return np.clip(x, 0, 1) * 255
+
+
+def __jpeg_compression(x, severity=1):
+    if "jpeg" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5], [85, 25, 18, 15, 10, 7], axis=0, kind="linear"
+        )
+        interpolation_function_dict["jpeg"] = f
+
+    f = interpolation_function_dict["jpeg"]
+    c = f(severity)
+
+    c = round(c.item())
+    output = BytesIO()
+
+    x.save(output, "JPEG", quality=c)
+    x = PILImage.open(output)
+    return x
+
+
+def __pixelate(x, severity=1):
+    if "pixelate" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5], [1.0, 0.6, 0.5, 0.4, 0.3, 0.25], axis=0, kind="linear"
+        )
+        interpolation_function_dict["pixelate"] = f
+
+    f = interpolation_function_dict["pixelate"]
+    c = f(severity)
+
+    x = x.resize((int(224 * c), int(224 * c)), PILImage.BOX)
+    x = x.resize((224, 224), PILImage.BOX)
+    return x
+
+
+def __elastic_transform(image, severity=1):
+    if "elastic_transform" not in interpolation_function_dict.keys():
+        f = interpolate.interp1d(
+            [0, 1, 2, 3, 4, 5],
+            [
+                (0, 999, 0),
+                (244 * 2, 244 * 0.7, 244 * 0.1),
+                (244 * 2, 244 * 0.08, 244 * 0.2),
+                (244 * 0.05, 244 * 0.01, 244 * 0.02),
+                (244 * 0.07, 244 * 0.01, 244 * 0.02),
+                (244 * 0.12, 244 * 0.01, 244 * 0.02),
+            ],
+            axis=0,
+            kind="linear",
+        )
+
+        #     c = [(244 * 2, 244 * 0.7, 244 * 0.1),   # 244 should have been 224, but ultimately nothing is incorrect
+        #          (244 * 2, 244 * 0.08, 244 * 0.2),
+        #          (244 * 0.05, 244 * 0.01, 244 * 0.02),
+        #          (244 * 0.07, 244 * 0.01, 244 * 0.02),
+        #          (244 * 0.12, 244 * 0.01, 244 * 0.02)][severity - 1]
+
+        interpolation_function_dict["elastic_transform"] = f
+
+    f = interpolation_function_dict["elastic_transform"]
+    c = f(severity)
+
+    image = np.array(image, dtype=np.float32) / 255.0
+    shape = image.shape
+    shape_size = shape[:2]
+
+    # random affine
+    center_square = np.float32(shape_size) // 2
+    square_size = min(shape_size) // 3
+    pts1 = np.float32(
+        [
+            center_square + square_size,
+            [center_square[0] + square_size, center_square[1] - square_size],
+            center_square - square_size,
+        ]
+    )
+    pts2 = pts1 + np.random.uniform(-c[2], c[2], size=pts1.shape).astype(np.float32)
+    M = cv2.getAffineTransform(pts1, pts2)
+    image = cv2.warpAffine(
+        image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101
+    )
+
+    dx = (
+        gaussian(
+            np.random.uniform(-1, 1, size=shape[:2]), c[1], mode="reflect", truncate=3
+        )
+        * c[0]
+    ).astype(np.float32)
+    dy = (
+        gaussian(
+            np.random.uniform(-1, 1, size=shape[:2]), c[1], mode="reflect", truncate=3
+        )
+        * c[0]
+    ).astype(np.float32)
+    dx, dy = dx[..., np.newaxis], dy[..., np.newaxis]
+
+    x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
+    indices = (
+        np.reshape(y + dy, (-1, 1)),
+        np.reshape(x + dx, (-1, 1)),
+        np.reshape(z, (-1, 1)),
+    )
+    return (
+        np.clip(
+            map_coordinates(image, indices, order=1, mode="reflect").reshape(shape),
+            0,
+            1,
+        )
+        * 255
+    )
+
+
+# /////////////// End Distortions ///////////////
+
+
+def noise_transforms() -> Dict[str, Callable]:
+    """Returns a dictionary of noise transforms."""
+    return {
+        "gaussian_noise": __gaussian_noise,
+        "shot_noise": __shot_noise,
+        "impulse_noise": __impulse_noise,
+        "defocus_blur": __defocus_blur,
+        "glass_blur": __glass_blur,
+        "motion_blur": __motion_blur,
+        "zoom_blur": __zoom_blur,
+        "snow": __snow,
+        "frost": __frost,
+        "fog": __fog,
+        "brightness": __brightness,
+        "contrast": __contrast,
+        "elastic": __elastic_transform,
+        "pixelate": __pixelate,
+        "jpeg": __jpeg_compression,
+    }
diff --git a/shifthappens/tasks/ccc/ccc_lmdb.py b/shifthappens/tasks/ccc/ccc_lmdb.py
new file mode 100644
index 00000000..a1474353
--- /dev/null
+++ b/shifthappens/tasks/ccc/ccc_lmdb.py
@@ -0,0 +1,132 @@
+"""
+Code copied from: https://github.com/rmccorm4/PyTorch-LMDB
+"""
+import os
+
+import lmdb
+import pyarrow as pa
+import six
+import torch.utils.data as data
+import tqdm
+from PIL import Image
+from torch.utils.data import DataLoader
+
+
+class ImageFolderLMDB(data.Dataset):
+    """
+    Saves a Dataset object as LMDB files
+    """
+
+    def __init__(self, db_path, transform=None, target_transform=None):
+        self.db_path = db_path
+        self.env = lmdb.open(
+            db_path,
+            subdir=os.path.isdir(db_path),
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False,
+        )
+        with self.env.begin(write=False) as txn:
+            # self.length = txn.stat()['entries'] - 1
+            self.length = pa.deserialize(txn.get(b"__len__"))
+            self.keys = pa.deserialize(txn.get(b"__keys__"))
+
+        self.transform = transform
+        self.target_transform = target_transform
+
+    def __getitem__(self, index):
+        env = self.env
+        with env.begin(write=False) as txn:
+            byteflow = txn.get(self.keys[index])
+        unpacked = pa.deserialize(byteflow)
+
+        # load image
+        imgbuf = unpacked[0]
+        buf = six.BytesIO()
+        buf.write(imgbuf)
+        buf.seek(0)
+        img = Image.open(buf).convert("RGB")
+
+        # load label
+        target = unpacked[1]
+
+        if self.transform is not None:
+            img = self.transform(img)
+
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return img, target
+
+    def __len__(self):
+        return self.length
+
+    def __repr__(self):
+        return self.__class__.__name__ + " (" + self.db_path + ")"
+
+
+def dumps_pyarrow(obj):
+    """
+    Serialize an object.
+
+    Returns:
+        Implementation-dependent bytes-like object
+    """
+    return pa.serialize(obj).to_buffer()
+
+
+def dset2lmdb(dataset, outpath, subset_size):
+    """
+    Saves a given dataset in LMDB format
+
+    Parameters
+    ----------
+    dataset :
+        DataSet object that you want to save
+    outpath : str
+        path to save generated files
+    subset_size: int
+        amount of images in dataset
+    """
+    data_loader = DataLoader(dataset, num_workers=0, collate_fn=lambda x: x)
+
+    lmdb_path = os.path.expanduser(outpath)
+    isdir = os.path.isdir(lmdb_path)
+
+    if subset_size == 50000:
+        map_size = 2048 * 2048 * 512  # allocates 2GB
+    elif subset_size == 5000:
+        map_size = 2048 * 2048 * 128  # allocates 512MB
+    else:
+        map_size = 2048 * 2048 * 2048 * 256  # allocates 1TB
+
+    db = lmdb.open(
+        lmdb_path,
+        subdir=isdir,
+        map_size=map_size,
+        readonly=False,
+        meminit=False,
+        map_async=True,
+    )
+
+    txn = db.begin(write=True)
+    for idx, sample in tqdm.tqdm(
+        enumerate(data_loader),
+        total=len(dataset),
+        desc="Generate LMDB to %s" % lmdb_path,
+    ):
+        image, label = sample[0]
+        txn.put(f"{idx}".encode("ascii"), dumps_pyarrow((image, label)))
+
+    # finish iterating through dataset
+    txn.commit()
+    keys = [f"{k}".encode("ascii") for k in range(len(data_loader))]
+    with db.begin(write=True) as txn:
+        txn.put(b"__keys__", dumps_pyarrow(keys))
+        txn.put(b"__len__", dumps_pyarrow(len(keys)))
+
+    print("Flushing database ...")
+    db.sync()
+    db.close()
+    print("Closing")
diff --git a/shifthappens/tasks/ccc/ccc_utils.py b/shifthappens/tasks/ccc/ccc_utils.py
new file mode 100644
index 00000000..ee0a889c
--- /dev/null
+++ b/shifthappens/tasks/ccc/ccc_utils.py
@@ -0,0 +1,500 @@
+import io
+import itertools
+import os
+import pickle
+import random
+from io import BytesIO
+
+import numpy as np
+import pandas as pd
+import requests
+import torch
+import torch.utils.data as data
+import torchvision.transforms as tv_transforms
+from PIL import Image
+
+from shifthappens.tasks.ccc.ccc_imagenet_c import noise_transforms
+from shifthappens.tasks.ccc.ccc_lmdb import dset2lmdb
+from shifthappens.tasks.ccc.ccc_lmdb import ImageFolderLMDB
+
+
+def path_to_dataset(data_dir, root):
+    """
+    Returns a list of directories that correspond to a given path between two noises
+
+    Parameters
+    ----------
+    data_dir : list
+        each entry is a tuple of two indices that represents a severity combination
+    root : str
+        path to image dir
+
+    Returns
+    -------
+    A list of directories that corresponds to the path along the severities
+    """
+    dir_list = []
+    for i in range(len(data_dir)):
+        dir_list.append(
+            os.path.join(
+                root,
+                "s1_"
+                + str(float(data_dir[i][0]) / 4)
+                + "s2_"
+                + str(float(data_dir[i][1]) / 4),
+            )
+        )
+    return dir_list
+
+
+def find_path(arr, target_val):
+    """
+    Finds a path going from one noise to another, such that the average accuracy of the subsets on the path is close to target_val
+
+    Parameters
+    ----------
+    arr : numpy array
+        each entry is an accuracy of a pretrained ResNet-50 classifier on a severity combination
+    target_val : float
+        the desired accuracy of the subsets along the path
+
+    Returns
+    -------
+    A list of indices, that correspond to a path through the severity combinations
+    """
+    cur_max = 99999999999
+    cost_dict = {}
+    path_dict = {}
+    for i in range(1, arr.shape[0]):
+        cost_dict, path_dict = traverse_graph(
+            cost_dict, path_dict, arr, i, 0, target_val
+        )
+
+    for i in range(1, arr.shape[0]):
+        cur_cost = abs(cost_dict[(i, 0)] / len(path_dict[(i, 0)]) - target_val)
+        if cur_cost < cur_max:
+            cur_max = cur_cost
+            cur_path = path_dict[(i, 0)]
+
+    return cur_path
+
+
+def traverse_graph(cost_dict, path_dict, arr, i, j, target_val):
+    """
+    Path finding helper function.
+
+    Parameters
+    ----------
+    cost_dict : dictionary containing floats
+        each entry corresponds to the optimal cost for a path that starts at index (i,j) (cost is a float that contains the total weight of the path)
+    path_dict : dictionary containing floats
+        each entry corresponds to the optimal path that starts at index (i,j) (a path is a list of indices)
+    arr : numpy array
+        each entry is an accuracy of a pretrained ResNet-50 classifier on a severity combination
+    i : int
+        current row index
+    j : int
+        current column index
+    target_val : float
+        the desired accuracy of the subsets along the path
+    Returns
+    -------
+    Two dictionaries, that correspond to the optimal cost for a path that starts at index (i,j), and their respective paths (list of indices)
+    """
+    if j >= arr.shape[1]:
+        if (i, j) not in cost_dict.keys():
+            cost_dict[(i, j)] = 9999999999999
+            path_dict[(i, j)] = [9999999999999]
+        return cost_dict, path_dict
+
+    if i == 0:
+        if (i, j) not in cost_dict.keys():
+            cost_dict[(i, j)] = arr[i][j]
+            path_dict[(i, j)] = [(i, j)]
+        return cost_dict, path_dict
+
+    if (i - 1, j) not in cost_dict.keys():
+        cost_dict, path_dict = traverse_graph(
+            cost_dict, path_dict, arr, i - 1, j, target_val
+        )
+    if (i, j + 1) not in cost_dict.keys():
+        cost_dict, path_dict = traverse_graph(
+            cost_dict, path_dict, arr, i, j + 1, target_val
+        )
+
+    if abs(
+        ((cost_dict[(i - 1, j)] + arr[i][j]) / (len(path_dict[i - 1, j]) + 1))
+        - target_val
+    ) < abs(
+        ((cost_dict[(i, j + 1)] + arr[i][j]) / (len(path_dict[i, j + 1]) + 1))
+        - target_val
+    ):
+        cost_dict[(i, j)] = cost_dict[(i - 1, j)] + arr[i][j]
+        path_dict[(i, j)] = [(i, j)] + path_dict[(i - 1, j)]
+    else:
+        cost_dict[(i, j)] = cost_dict[(i, j + 1)] + arr[i][j]
+        path_dict[(i, j)] = [(i, j)] + path_dict[(i, j + 1)]
+
+    return cost_dict, path_dict
+
+
+def get_frost_images(data_dir):
+    """
+    Downloads frost images from the ImageNet-C repo.
+
+    Parameters
+    ----------
+    data_dir : str
+        where the images will be saved
+    target_dir : str
+    """
+    url = "https://raw.githubusercontent.com/hendrycks/robustness/master/ImageNet-C/create_c/"
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir, exist_ok=True)
+
+    frost_images_path = os.path.join(data_dir, "frost")
+    frost_images = {
+        "frost1.png",
+        "frost2.png",
+        "frost3.png",
+        "frost4.jpg",
+        "frost5.jpg",
+        "frost6.jpg",
+    }
+    if not os.path.exists(frost_images_path):
+        os.mkdir(frost_images_path)
+
+    for image_name in frost_images.difference(set(os.listdir(frost_images_path))):
+        response = requests.get(url + image_name)
+        img = Image.open(BytesIO(response.content))
+        img.save(os.path.join(frost_images_path, image_name))
+
+
+class WalkLoader(data.Dataset):
+    """
+    Generates a continuous walk through noises, at a desired baseline accuracy.
+
+    Parameters
+    ----------
+    data_dir : str
+        path to image dir (these are the images that the noises will be applied to)
+    target_dir : str
+        path to put generated files in
+    seed: int
+        seed for the random number generator
+    frequency: int
+        denotes how many images will be sampled from each subset
+    base_amount: int
+        this is about the entire size of the dataset (but not actually, because we need to start and end on the same noise)
+    accuracy: int
+        desired baseline accuracy to be used
+    subset_size: int
+        of the images in data_dir, how many should we use?
+    """
+
+    def __init__(
+        self, data_dir, target_dir, seed, frequency, base_amount, accuracy, subset_size
+    ):
+        self.data_dir = data_dir
+        self.target_dir = target_dir
+        self.seed = seed
+        self.frequency = frequency
+        self.base_amount = base_amount
+        self.accuracy = accuracy
+        self.subset_size = subset_size
+        self.save_dir = os.path.join(
+            self.target_dir,
+            str(accuracy),
+            str(seed),
+            str(frequency),
+            str(base_amount),
+            str(subset_size),
+        )
+        random.seed(self.seed)
+        np.random.seed(self.seed)
+        accuracy_dict = {}
+        walk_dict = {}
+
+        assert accuracy in [0, 20, 40]
+        if accuracy == 40:
+            self.single_noises = [
+                "gaussian_noise",
+                "shot_noise",
+                "impulse_noise",
+                "defocus_blur",
+                "glass_blur",
+                "motion_blur",
+                "zoom_blur",
+                "snow",
+                "frost",
+                "fog",
+                # 'brightness', # these noises aren't used for baseline accuracy=40
+                "contrast",
+                "elastic",
+                "pixelate",
+                "jpeg",
+            ]
+        if accuracy == 20:
+            self.single_noises = [
+                "gaussian_noise",
+                "shot_noise",
+                "impulse_noise",
+                "defocus_blur",
+                "glass_blur",
+                "motion_blur",
+                "zoom_blur",
+                "snow",
+                "frost",
+                "fog",
+                # 'brightness', # these noises aren't used for baseline accuracy=20
+                "contrast",
+                "elastic",
+                "pixelate",
+                # 'jpeg' # these noises aren't used for baseline accuracy=20
+            ]
+        if accuracy == 0:
+            self.single_noises = [
+                "gaussian_noise",
+                "shot_noise",
+                "impulse_noise",
+                # 'defocus_blur', # these noises aren't used for baseline accuracy=0
+                # 'glass_blur', # these noises aren't used for baseline accuracy=0
+                # 'motion_blur', # these noises aren't used for baseline accuracy=0
+                # 'zoom_blur', # these noises aren't used for baseline accuracy=0
+                # 'snow', # these noises aren't used for baseline accuracy=0
+                # 'frost', # these noises aren't used for baseline accuracy=0
+                # 'fog', # these noises aren't used for baseline accuracy=0
+                # 'brightness', # these noises aren't used for baseline accuracy=0
+                "contrast",
+                # 'elastic', # these noises aren't used for baseline accuracy=0
+                # 'pixelate', # these noises aren't used for baseline accuracy=0
+                # 'jpeg' # these noises aren't used for baseline accuracy=0
+            ]
+
+        pickle_path = os.path.join(self.target_dir, "ccc_accuracy_matrix.pickle")
+        if not os.path.exists(pickle_path):
+            url = "https://nc.mlcloud.uni-tuebingen.de/index.php/s/izTMnXkaHoNBZT4/download/ccc_accuracy_matrix.pickle"
+            accuracy_matrix = pd.read_pickle(url)
+            os.makedirs(self.target_dir, exist_ok=True)
+            with open(pickle_path, "wb") as f:
+                pickle.dump(accuracy_matrix, f)
+        else:
+            with open(pickle_path, "rb") as f:
+                accuracy_matrix = pickle.load(f)
+        get_frost_images(self.target_dir)
+        os.makedirs(self.save_dir, exist_ok=True)
+
+        noise_list = list(itertools.product(self.single_noises, self.single_noises))
+        for i in range(len(noise_list)):
+            noise1, noise2 = noise_list[i]
+            if noise1 == noise2:
+                continue
+            current_accuracy_matrix = accuracy_matrix["n1_" + noise1 + "_n2_" + noise2]
+            walk = find_path(current_accuracy_matrix, self.accuracy)
+
+            accuracy_dict[(noise1, noise2)] = accuracy_matrix
+            walk_dict[(noise1, noise2)] = walk
+
+        keys = list(accuracy_dict.keys())
+        cur_noises = random.choice(keys)
+
+        walk = walk_dict[cur_noises]
+        data_path = os.path.join(self.save_dir, "n1_" + noise1 + "_n2_" + noise2)
+        walk_datasets = path_to_dataset(walk, data_path)
+
+        self.walk_dict = walk_dict
+        self.walk_ind = 0
+        self.walk = walk
+        self.walk_datasets = walk_datasets
+
+        self.noise1 = random.choice(self.single_noises)
+        self.first_noise1 = self.noise1
+        self.noise2 = random.choice(self.single_noises)
+        while self.noise1 == self.noise2:
+            self.noise2 = random.choice(self.single_noises)
+
+        self.lastrun = 0
+
+    def generate_dataset(self):
+        """Generates files, but if they already exist (or it finished generating files), returns a Dataset Object."""
+        total_generated = 0
+        all_data = None
+
+        while True:
+            temp_path = self.walk_datasets[self.walk_ind]
+            severities = os.path.normpath(
+                os.path.basename(temp_path)
+            )  # takes name of upper dir
+            n1 = self.noise1
+            n2 = self.noise2
+
+            severities_split = severities.split("_")
+            s1 = float(severities_split[1][:-2])
+            s2 = float(severities_split[2])
+
+            path = (
+                os.path.join(self.save_dir, "n1_" + str(n1) + "_n2_" + str(n2))
+                + "_s1_"
+                + str(s1)
+                + "_s2_"
+                + str(s2)
+            )
+            if not os.path.exists(path):
+                os.mkdir(path)
+
+            if not (
+                os.path.exists(os.path.join(path, "lock.mdb"))
+                and os.path.exists(os.path.join(path, "data.mdb"))
+            ):
+                generated_subset = ApplyTransforms(
+                    self.data_dir, n1, n2, s1, s2, self.subset_size, self.save_dir
+                )
+                dset2lmdb(generated_subset, path, self.subset_size)
+
+            test_transform = tv_transforms.Compose(
+                [
+                    tv_transforms.ToTensor(),
+                    tv_transforms.Lambda(lambda x: x.permute(1, 2, 0)),
+                ]
+            )
+
+            try:
+                cur_data = ImageFolderLMDB(db_path=path, transform=test_transform)
+            except BaseException:
+                generated_subset = ApplyTransforms(
+                    self.data_dir, n1, n2, s1, s2, self.subset_size, self.save_dir
+                )
+                dset2lmdb(generated_subset, path, self.subset_size)
+
+            remainder = self.frequency
+            while remainder > 0:
+                cur_data = ImageFolderLMDB(db_path=path, transform=test_transform)
+                inds = np.random.permutation(len(cur_data))[:remainder]
+                cur_data = torch.utils.data.Subset(cur_data, inds)
+                remainder -= len(cur_data)
+
+            if all_data is not None:
+                all_data = torch.utils.data.ConcatDataset([all_data, cur_data])
+            else:
+                all_data = cur_data
+
+            total_generated += self.frequency
+            # print('total ', total_generated)
+            if self.walk_ind == len(self.walk) - 1:
+                self.noise1 = self.noise2
+
+                if total_generated > self.base_amount and self.lastrun == 0:
+                    if self.noise1 != self.first_noise1:
+                        self.noise2 = self.first_noise1
+                        self.lastrun = 1
+                    else:
+                        return all_data
+                elif self.lastrun == 1:
+                    return all_data
+                else:
+                    while self.noise1 == self.noise2:
+                        self.noise2 = random.choice(self.single_noises)
+
+                self.walk = self.walk_dict[(self.noise1, self.noise2)]
+                data_path = os.path.join(
+                    self.save_dir, "n1_" + self.noise1 + "_n2_" + self.noise2
+                )
+                self.walk_datasets = path_to_dataset(self.walk, data_path)
+                self.walk_ind = 0
+            else:
+                self.walk_ind += 1
+
+
+class ApplyTransforms(data.Dataset):
+    """
+    Applies the desired noise transforms to a dataset. In our case, we apply 2 ImageNet-C noises at two severities.
+
+    Parameters
+    ----------
+    data_dir : str
+        path to image dir (these are the images that the noises will be applied to)
+    n1 : function
+        noise function #1
+    n2 : function
+        noise function #2
+    s1: int
+        denotes the severity of noise #1
+    s2: int
+        denotes the severity of noise #2
+    subset_size: int
+        of the images in data_dir, how many should we use?
+    frost_dir: str
+        directory of the frost images, used to noise images with frost
+    Returns
+    -------
+    Dataset Object
+
+    """
+
+    def __init__(self, data_dir, n1, n2, s1, s2, subset_size, frost_dir):
+        d = noise_transforms()
+        self.data_dir = data_dir
+        self.n1_frost, self.n2_frost = False, False
+        if n1 == "frost":
+            self.n1_frost = True
+        if n2 == "frost":
+            self.n2_frost = True
+        self.n1 = d[n1]
+        self.n2 = d[n2]
+        self.s1 = s1
+        self.s2 = s2
+        self.frost_dir = frost_dir
+
+        self.trn = tv_transforms.Compose(
+            [tv_transforms.Resize(256), tv_transforms.CenterCrop(224)]
+        )
+        all_paths = []
+
+        for path, dirs, files in os.walk(self.data_dir):
+            for name in files:
+                all_paths.append(os.path.join(path, name))
+
+        np.random.shuffle(all_paths)
+        self.paths = all_paths
+        self.paths = self.paths[:subset_size]
+        all_classes = os.listdir(os.path.join(self.data_dir))
+
+        target_list = []
+        for cur_path in self.paths:
+            cur_class = os.path.normpath(
+                os.path.basename(os.path.abspath(os.path.join(cur_path, os.pardir)))
+            )  # takes name of parent dir
+            cur_class = all_classes.index(cur_class)
+            target_list.append(cur_class)
+
+        self.targets = target_list
+
+    def __getitem__(self, index):
+        path = self.paths[index]
+        target = self.targets[index]
+
+        with open(path, "rb") as f:
+            img = Image.open(f)
+            img = img.convert("RGB")
+        img = self.trn(img)
+
+        if self.s1 > 0:
+            if self.n1_frost:
+                img = self.n1(img, self.s1, self.frost_dir)
+            else:
+                img = self.n1(img, self.s1)
+            img = Image.fromarray(np.uint8(img))
+        if self.s2 > 0:
+            if self.n2_frost:
+                img = self.n2(img, self.s2, self.frost_dir)
+            else:
+                img = self.n2(img, self.s2)
+            img = Image.fromarray(np.uint8(img))
+
+        output = io.BytesIO()
+        img.save(output, format="JPEG", quality=85, optimize=True)
+        corrupted_img = output.getvalue()
+        return corrupted_img, target
+
+    def __len__(self):
+        return len(self.paths)