diff --git a/pyproject.toml b/pyproject.toml
index 3ea17821f..628356fb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ dependencies = [
   "packaging >=24.0",
   "pandas >=2.2",
   "pint >=0.18",
-  "platformdirs >=3.2",
+  "pooch >=1.8.0",
   "pyarrow >=15.0.0", # Strongly encouraged for pandas v2.2.0+
   "pyyaml >=6.0.1",
   "scikit-learn >=0.21.3",
@@ -79,8 +79,6 @@ dev = [
   "nbval >=0.11.0",
   "pandas-stubs >=2.2",
   "pip >=24.0",
-  "platformdirs >=3.2",
-  "pooch >=1.8.0",
   "pre-commit >=3.7",
   "pylint >=3.2.4",
   "pytest >=8.0.0",
diff --git a/tests/conftest.py b/tests/conftest.py
index 95c36f4eb..e98fb784f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,10 +13,10 @@
 from xclim.core import indicator
 from xclim.core.calendar import max_doy
 from xclim.testing import helpers
+from xclim.testing.helpers import default_cache_dir  # noqa
 from xclim.testing.helpers import nimbus as _nimbus
+from xclim.testing.helpers import open_dataset as _open_dataset
 from xclim.testing.helpers import test_timeseries
-from xclim.testing.utils import default_cache_dir  # noqa
-from xclim.testing.utils import open_dataset as _open_dataset
 
 
 @pytest.fixture
@@ -26,21 +26,7 @@ def random() -> np.random.Generator:
 
 @pytest.fixture
 def tmp_netcdf_filename(tmpdir):
-    yield Path(tmpdir).joinpath("testfile.nc")
-
-
-@pytest.fixture(autouse=True, scope="session")
-def threadsafe_data_dir(tmp_path_factory):
-    yield Path(tmp_path_factory.getbasetemp().joinpath("data"))
-
-
-@pytest.fixture(autouse=True, scope="session")
-def nimbus(threadsafe_data_dir):
-    yield _nimbus(
-        data_dir=threadsafe_data_dir,
-        repo=helpers.TESTDATA_REPO_URL,
-        branch=helpers.TESTDATA_BRANCH,
-    )
+    return Path(tmpdir).joinpath("testfile.nc")
 
 
 @pytest.fixture
@@ -57,6 +43,11 @@ def _lat_series(values):
     return _lat_series
 
 
+@pytest.fixture
+def timeseries():
+    return test_timeseries
+
+
 @pytest.fixture
 def tas_series():
     """Return mean temperature time series."""
@@ -309,40 +300,30 @@ def rlus_series():
 
 
 @pytest.fixture(scope="session")
-def cmip3_day_tas(threadsafe_data_dir):
-    # xr.set_options(enable_cftimeindex=False)
-    ds = _open_dataset(
-        "cmip3/tas.sresb1.giss_model_e_r.run1.atm.da.nc",
-        cache_dir=threadsafe_data_dir,
-        branch=helpers.TESTDATA_BRANCH,
-        engine="h5netcdf",
-    )
-    yield ds.tas
-    ds.close()
+def threadsafe_data_dir(tmp_path_factory):
+    return Path(tmp_path_factory.getbasetemp().joinpath("data"))
 
 
 @pytest.fixture(scope="session")
-def get_file(nimbus):
-    def _get_session_scoped_file(file: str):
-        nimbus.fetch(file)
-
-    return _get_session_scoped_file
+def nimbus(threadsafe_data_dir):
+    return _nimbus(
+        data_dir=threadsafe_data_dir,
+        repo=helpers.TESTDATA_REPO_URL,
+        branch=helpers.TESTDATA_BRANCH,
+    )
 
 
 @pytest.fixture(scope="session")
-def open_dataset(threadsafe_data_dir):
-    def _open_session_scoped_file(
-        file: str | os.PathLike, branch: str = helpers.TESTDATA_BRANCH, **xr_kwargs
-    ):
+def open_dataset(nimbus):
+    def _open_session_scoped_file(file: str | os.PathLike, **xr_kwargs):
+        xr_kwargs.setdefault("cache", True)
         xr_kwargs.setdefault("engine", "h5netcdf")
-        return _open_dataset(
-            file, cache_dir=threadsafe_data_dir, branch=branch, **xr_kwargs
-        )
+        return _open_dataset(file, cache_dir=nimbus.path, **xr_kwargs)
 
     return _open_session_scoped_file
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def official_indicators():
     # Remove unofficial indicators (as those created during the tests, and those from YAML-built modules)
     registry_cp = indicator.registry.copy()
@@ -352,17 +333,39 @@ def official_indicators():
     return registry_cp
 
 
-@pytest.fixture(scope="function")
-def atmosds(threadsafe_data_dir) -> xr.Dataset:
+@pytest.fixture
+def lafferty_sriver_ds(nimbus) -> xr.Dataset:
+    """Get data from Lafferty & Sriver unit test.
+
+    Notes
+    -----
+    https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/tree/main/unit_test
+    """
+    fn = nimbus.fetch(
+        "uncertainty_partitioning/seattle_avg_tas.csv",
+    )
+
+    df = pd.read_csv(fn, parse_dates=["time"]).rename(
+        columns={"ssp": "scenario", "ensemble": "downscaling"}
+    )
+
+    # Make xarray dataset
+    return xr.Dataset.from_dataframe(
+        df.set_index(["scenario", "model", "downscaling", "time"])
+    )
+
+
+@pytest.fixture
+def atmosds(nimbus) -> xr.Dataset:
+    """Get synthetic atmospheric dataset."""
     return _open_dataset(
-        threadsafe_data_dir.joinpath("atmosds.nc"),
-        cache_dir=threadsafe_data_dir,
-        branch=helpers.TESTDATA_BRANCH,
+        "atmosds.nc",
+        cache_dir=nimbus.path,
         engine="h5netcdf",
     ).load()
 
 
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="session")
 def ensemble_dataset_objects() -> dict[str, str]:
     edo = dict()
     edo["nc_files_simple"] = [
@@ -378,8 +381,8 @@ def ensemble_dataset_objects() -> dict[str, str]:
     return edo
 
 
-@pytest.fixture(scope="session", autouse=True)
-def gather_session_data(threadsafe_data_dir, worker_id):
+@pytest.fixture(autouse=True, scope="session")
+def gather_session_data(request, nimbus, worker_id):
     """Gather testing data on pytest run.
 
     When running pytest with multiple workers, one worker will copy data remotely to _default_cache_dir while
@@ -389,25 +392,13 @@ def gather_session_data(threadsafe_data_dir, worker_id):
     Additionally, this fixture is also used to generate the `atmosds` synthetic testing dataset.
     """
     helpers.testing_setup_warnings()
-    helpers.gather_testing_data(threadsafe_data_dir, worker_id)
-    helpers.generate_atmos(threadsafe_data_dir)
-
-
-@pytest.fixture(scope="session", autouse=True)
-def cleanup(request):
-    """Cleanup a testing file once we are finished.
-
-    This flag prevents remote data from being downloaded multiple times in the same pytest run.
-    """
+    helpers.gather_testing_data(nimbus.path, worker_id)
+    helpers.generate_atmos(nimbus.path)
 
     def remove_data_written_flag():
+        """Cleanup cache folder once we are finished."""
         flag = default_cache_dir.joinpath(".data_written")
         if flag.exists():
             flag.unlink()
 
     request.addfinalizer(remove_data_written_flag)
-
-
-@pytest.fixture
-def timeseries():
-    return test_timeseries
diff --git a/tests/test_analog.py b/tests/test_analog.py
index 72857b007..2608df226 100644
--- a/tests/test_analog.py
+++ b/tests/test_analog.py
@@ -58,8 +58,8 @@ def test_exact_randn(exact_randn):
 @pytest.mark.slow
 @pytest.mark.parametrize("method", xca.metrics.keys())
 def test_spatial_analogs(method, open_dataset):
-    diss = open_dataset("SpatialAnalogs/dissimilarity")
-    data = open_dataset("SpatialAnalogs/indicators")
+    diss = open_dataset("SpatialAnalogs/dissimilarity.nc")
+    data = open_dataset("SpatialAnalogs/indicators.nc")
 
     target = data.sel(lat=46.1875, lon=-72.1875, time=slice("1970", "1990"))
     candidates = data.sel(time=slice("1970", "1990"))
@@ -75,7 +75,7 @@ def test_spatial_analogs(method, open_dataset):
 def test_unsupported_spatial_analog_method(open_dataset):
     method = "KonMari"
 
-    data = open_dataset("SpatialAnalogs/indicators")
+    data = open_dataset("SpatialAnalogs/indicators.nc")
     target = data.sel(lat=46.1875, lon=-72.1875, time=slice("1970", "1990"))
     candidates = data.sel(time=slice("1970", "1990"))
 
@@ -87,8 +87,8 @@ def test_unsupported_spatial_analog_method(open_dataset):
 
 def test_spatial_analogs_multi_index(open_dataset):
     # Test multi-indexes
-    diss = open_dataset("SpatialAnalogs/dissimilarity")
-    data = open_dataset("SpatialAnalogs/indicators")
+    diss = open_dataset("SpatialAnalogs/dissimilarity.nc")
+    data = open_dataset("SpatialAnalogs/indicators.nc")
 
     target = data.sel(lat=46.1875, lon=-72.1875, time=slice("1970", "1990"))
     candidates = data.sel(time=slice("1970", "1990"))
diff --git a/tests/test_atmos.py b/tests/test_atmos.py
index 10d5d0efe..23929550d 100644
--- a/tests/test_atmos.py
+++ b/tests/test_atmos.py
@@ -94,7 +94,7 @@ def test_humidex(tas_series):
 
 
 def test_heat_index(atmosds):
-    # Keep just Montreal values for summertime as we need tas > 20 degC
+    # Keep just Montreal values for summer as we need tas > 20 degC
     tas = atmosds.tasmax[1][150:170]
     hurs = atmosds.hurs[1][150:170]
 
diff --git a/tests/test_indices.py b/tests/test_indices.py
index a9386087f..69142a077 100644
--- a/tests/test_indices.py
+++ b/tests/test_indices.py
@@ -2562,12 +2562,14 @@ def test_simple(self, open_dataset, ind, exp):
         out = ind(ds.tas.sel(location="Victoria"))
         np.testing.assert_almost_equal(out[0], exp, decimal=4)
 
-    def test_indice_against_icclim(self, cmip3_day_tas):
+    def test_indice_against_icclim(self, open_dataset):
         from xclim.indicators import icclim  # noqa
 
+        cmip3_tas = open_dataset("cmip3/tas.sresb1.giss_model_e_r.run1.atm.da.nc").tas
+
         with set_options(cf_compliance="log"):
-            ind = xci.tg_mean(cmip3_day_tas)
-            icclim = icclim.TG(cmip3_day_tas)
+            ind = xci.tg_mean(cmip3_tas)
+            icclim = icclim.TG(cmip3_tas)
 
         np.testing.assert_array_equal(icclim, ind)
 
diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py
index 54e27d823..f34691985 100644
--- a/tests/test_partitioning.py
+++ b/tests/test_partitioning.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import numpy as np
-import pandas as pd
 import xarray as xr
 
 from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver
@@ -108,19 +107,8 @@ def test_lafferty_sriver_synthetic(random):
     lafferty_sriver(da, sm=sm)
 
 
-def test_lafferty_sriver(get_file):
-    seattle = get_file("uncertainty_partitioning/seattle_avg_tas.csv")
-
-    df = pd.read_csv(seattle, parse_dates=["time"]).rename(
-        columns={"ssp": "scenario", "ensemble": "downscaling"}
-    )
-
-    # Make xarray dataset
-    ds = xr.Dataset.from_dataframe(
-        df.set_index(["scenario", "model", "downscaling", "time"])
-    )
-
-    _g, u = lafferty_sriver(ds.tas)
+def test_lafferty_sriver(lafferty_sriver_ds):
+    _g, u = lafferty_sriver(lafferty_sriver_ds.tas)
 
     fu = fractional_uncertainty(u)
 
diff --git a/tests/test_testing_utils.py b/tests/test_testing_utils.py
index 63e0881a0..35646f12e 100644
--- a/tests/test_testing_utils.py
+++ b/tests/test_testing_utils.py
@@ -3,14 +3,14 @@
 import platform
 import sys
 from pathlib import Path
-from urllib.error import URLError
 
 import numpy as np
 import pytest
 from xarray import Dataset
 
-import xclim.testing.utils as utilities
 from xclim import __version__ as __xclim_version__
+from xclim.testing import helpers
+from xclim.testing import utils as utilities
 from xclim.testing.helpers import test_timeseries as timeseries
 
 
@@ -39,52 +39,9 @@ def file_md5_checksum(f_name):
             hash_md5.update(f.read())
         return hash_md5.hexdigest()
 
-    @pytest.mark.requires_internet
-    def test_get_failure(self, tmp_path):
-        bad_repo_address = "https://github.com/beard/of/zeus/"
-        with pytest.raises(FileNotFoundError):
-            utilities._get(
-                Path("san_diego", "60_percent_of_the_time_it_works_everytime"),
-                bad_repo_address,
-                "main",
-                tmp_path,
-            )
-
-    @pytest.mark.requires_internet
-    def test_open_dataset_with_bad_file(self, tmp_path):
-        cmip3_folder = tmp_path.joinpath("main", "cmip3")
-        cmip3_folder.mkdir(parents=True)
-
-        cmip3_file = "tas.sresb1.giss_model_e_r.run1.atm.da.nc"
-        Path(cmip3_folder, cmip3_file).write_text("This file definitely isn't right.")
-
-        cmip3_md5 = f"{cmip3_file}.md5"
-        bad_cmip3_md5 = "bc51206e6462fc8ed08fd4926181274c"
-        Path(cmip3_folder, cmip3_md5).write_text(bad_cmip3_md5)
-
-        # Check for raised warning for local file md5 sum and remote md5 sum
-        with pytest.warns(UserWarning):
-            new_cmip3_file = utilities._get(
-                Path("cmip3", cmip3_file),
-                github_url="https://github.com/Ouranosinc/xclim-testdata",
-                branch="main",
-                cache_dir=tmp_path,
-            )
-
-        # Ensure that the new cmip3 file is in the cache directory
-        assert (
-            self.file_md5_checksum(Path(cmip3_folder, new_cmip3_file)) != bad_cmip3_md5
-        )
-
-        # Ensure that the md5 file was updated at the same time
-        assert (
-            self.file_md5_checksum(Path(cmip3_folder, new_cmip3_file))
-            == Path(cmip3_folder, cmip3_md5).read_text()
-        )
-
     @pytest.mark.requires_internet
     def test_open_testdata(self):
-        ds = utilities.open_dataset(
+        ds = helpers.open_dataset(
             Path("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712"), engine="h5netcdf"
         )
         assert ds.lon.size == 128
@@ -126,22 +83,3 @@ def test_release_notes_file_not_implemented(self, tmp_path):
         temp_filename = tmp_path.joinpath("version_info.txt")
         with pytest.raises(NotImplementedError):
             utilities.publish_release_notes(style="qq", file=temp_filename)
-
-
-class TestTestingFileAccessors:
-    def test_unsafe_urls(self):
-        with pytest.raises(
-            ValueError, match="GitHub URL not secure: 'ftp://domain.does.not.exist/'."
-        ):
-            utilities.open_dataset(
-                "doesnt_exist.nc", github_url="ftp://domain.does.not.exist/"
-            )
-
-    def test_malicious_urls(self):
-        with pytest.raises(
-            URLError,
-            match="urlopen error OPeNDAP URL is not well-formed: 'doesnt_exist.nc'",
-        ):
-            utilities.open_dataset(
-                "doesnt_exist.nc", dap_url="Robert'); DROP TABLE STUDENTS; --"
-            )
diff --git a/xclim/cli.py b/xclim/cli.py
index 67a6da1eb..1df887b0d 100644
--- a/xclim/cli.py
+++ b/xclim/cli.py
@@ -11,13 +11,17 @@
 
 import click
 import xarray as xr
-from dask.diagnostics import ProgressBar
+from dask.diagnostics.progress import ProgressBar
 
 import xclim as xc
 from xclim.core.dataflags import DataQualityException, data_flags, ecad_compliant
 from xclim.core.utils import InputKind
-from xclim.testing.helpers import TESTDATA_BRANCH, populate_testing_data
-from xclim.testing.utils import _default_cache_dir, publish_release_notes, show_versions
+from xclim.testing.helpers import (
+    TESTDATA_BRANCH,
+    default_cache_dir,
+    populate_testing_data,
+)
+from xclim.testing.utils import publish_release_notes, show_versions
 
 distributed = False
 try:
@@ -169,7 +173,7 @@ def prefetch_testing_data(ctx, branch):
         f"Gathering testing data from xclim-testdata `{testdata_branch}` branch..."
     )
     click.echo(populate_testing_data(branch=testdata_branch))
-    click.echo(f"Testing data saved to `{_default_cache_dir}`.")
+    click.echo(f"Testing data saved to `{default_cache_dir}`.")
     ctx.exit()
 
 
diff --git a/xclim/testing/conftest.py b/xclim/testing/conftest.py
index 12af10934..7e175e975 100644
--- a/xclim/testing/conftest.py
+++ b/xclim/testing/conftest.py
@@ -11,12 +11,11 @@
 import pytest
 
 from xclim.testing import helpers
-from xclim.testing.utils import _default_cache_dir  # noqa
-from xclim.testing.utils import open_dataset as _open_dataset
+from xclim.testing.helpers import open_dataset as _open_dataset
 
 
 @pytest.fixture(autouse=True, scope="session")
-def threadsafe_data_dir(tmp_path_factory) -> Path:
+def threadsafe_data_dir(tmp_path_factory):
     """Return a threadsafe temporary directory for storing testing data."""
     yield Path(tmp_path_factory.getbasetemp().joinpath("data"))
 
diff --git a/xclim/testing/helpers.py b/xclim/testing/helpers.py
index 8b1b687a1..81d2ce31a 100644
--- a/xclim/testing/helpers.py
+++ b/xclim/testing/helpers.py
@@ -6,15 +6,14 @@
 import logging
 import os
 import re
-import shutil
-import tempfile
 import time
 import warnings
 from datetime import datetime as dt
 from pathlib import Path
 from shutil import copytree
 from sys import platform
-from urllib.error import HTTPError
+from urllib.error import HTTPError, URLError
+from urllib.parse import urljoin, urlparse
 
 import numpy as np
 import pandas as pd
@@ -23,6 +22,8 @@
 from dask.callbacks import Callback
 from filelock import FileLock
 from packaging.version import Version
+from xarray import Dataset
+from xarray import open_dataset as _open_dataset
 
 try:
     from pytest_socket import SocketBlockedError
@@ -37,11 +38,14 @@
     longwave_upwelling_radiation_from_net_downwelling,
     shortwave_upwelling_radiation_from_net_downwelling,
 )
-from xclim.testing.utils import default_cache_dir
-from xclim.testing.utils import open_dataset as _open_dataset
+
+logger = logging.getLogger("xclim")
 
 default_testdata_version = "v2023.12.14"
+"""Default version of the testing data to use when fetching datasets."""
 
+default_cache_dir = Path(pooch.os_cache("xclim-testdata"))
+"""Default location for the testing data cache."""
 
 TESTDATA_REPO_URL = str(
     os.getenv("XCLIM_TESTDATA_REPO_URL", "https://github.com/Ouranosinc/xclim-testdata")
@@ -122,33 +126,17 @@
     $ env XCLIM_DATA_DIR="/path/to/my/data" pytest
 """
 
-DATA_UPDATES = bool(os.getenv("XCLIM_DATA_UPDATES"))
-"""Sets whether to allow updates to the testing datasets.
-
-If set to ``True``, the data files will be downloaded even if the upstream hashes do not match.
-
-Notes
------
-When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
-
-.. code-block:: console
-
-    $ export XCLIM_DATA_UPDATES=True
-
-or setting the variable at runtime:
-
-.. code-block:: console
-
-    $ env XCLIM_DATA_UPDATES=True pytest
-"""
 
 __all__ = [
-    "DATA_UPDATES",
     "PREFETCH_TESTING_DATA",
     "TESTDATA_BRANCH",
     "add_example_file_paths",
     "assert_lazy",
+    "default_cache_dir",
     "generate_atmos",
+    "nimbus",
+    "open_dataset",
+    "populate_testing_data",
     "test_timeseries",
 ]
 
@@ -181,100 +169,48 @@ def testing_setup_warnings():
             )
 
 
-def load_registry(
-    file: str | Path | None = None,
-    repo: str = TESTDATA_REPO_URL,
-    branch: str = TESTDATA_BRANCH,
-) -> dict[str, str]:
+def load_registry() -> dict[str, str]:
     """Load the registry file for the test data.
 
-    Parameters
-    ----------
-    file : str or Path, optional
-        Path to the registry file. If not provided, the registry file found within the package data will be used.
-
     Returns
     -------
     dict
         Dictionary of filenames and hashes.
     """
-    remote = f"{repo}/raw/{branch}/data"
-
-    # Get registry file from package_data
-    if file is None:
-        registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
-        if not registry_file.exists():
-            registry_file.touch()
-        url = f"{remote}/{registry_file.name}"
-        try:
-            with tempfile.TemporaryDirectory() as tempdir:
-                remote_registry_file = pooch.retrieve(
-                    url=url,
-                    known_hash=None,
-                    path=tempdir,
-                    fname="registry.txt",
-                )
-                # Check if the local registry file matches the remote registry
-                if pooch.file_hash(remote_registry_file) != pooch.file_hash(
-                    registry_file.as_posix()
-                ):
-                    warnings.warn(
-                        "Local registry file does not match remote registry file."
-                    )
-                    shutil.move(remote_registry_file, registry_file)
-        except FileNotFoundError:
-            warnings.warn(
-                "Registry file not accessible in remote repository. "
-                "Aborting file retrieval and using local registry file."
-            )
-        except SocketBlockedError:
-            warnings.warn(
-                "Testing suite is being run with `--disable-socket`. Using local registry file."
-            )
-        if not registry_file.exists():
-            raise FileNotFoundError(
-                f"Local registry file not found: {registry_file}. "
-                "Testing setup cannot proceed without registry file."
-            )
-    else:
-        registry_file = Path(file)
-        if not registry_file.exists():
-            raise FileNotFoundError(f"Registry file not found: {registry_file}")
-
-    logging.info("Registry file found: %s", registry_file)
+    registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
+    if not registry_file.exists():
+        raise FileNotFoundError(f"Registry file not found: {registry_file}")
 
     # Load the registry file
-    registry = dict()
-    with registry_file.open() as buffer:
-        for entry in buffer.readlines():
-            registry[entry.split()[0]] = entry.split()[1]
-
+    with registry_file.open() as f:
+        registry = {line.split()[0]: line.split()[1] for line in f}
     return registry
 
 
 def nimbus(  # noqa: PR01
     data_dir: str | Path = CACHE_DIR,
-    data_updates: bool = DATA_UPDATES,
     repo: str = TESTDATA_REPO_URL,
     branch: str = TESTDATA_BRANCH,
+    data_updates: bool = True,
 ) -> pooch.Pooch:
-    """Pooch registry instance for xhydro test data.
+    """Pooch registry instance for xclim test data.
 
     Parameters
     ----------
     data_dir : str or Path
         Path to the directory where the data files are stored.
-    data_updates : bool
-        If True, allow updates to the data files.
+
     repo : str
         URL of the repository to use when fetching testing datasets.
     branch : str
         Branch of repository to use when fetching testing datasets.
+    data_updates : bool
+        If True, allow updates to the data files. Default is True.
 
     Returns
     -------
     pooch.Pooch
-        Pooch instance for the xhydro test data.
+        Pooch instance for the xclim test data.
 
     Notes
     -----
@@ -282,8 +218,6 @@ def nimbus(  # noqa: PR01
         - ``XCLIM_DATA_DIR``: If this environment variable is set, it will be used as the base directory to store the data
           files. The directory should be an absolute path (i.e., it should start with ``/``). Otherwise,
           the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
-        - ``XCLIM_DATA_UPDATES``: If this environment variable is set, then the data files will be downloaded even if the
-          upstream hashes do not match. This is useful if you want to always use the latest version of the data files.
         - ``XCLIM_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of the repository
           to use when fetching datasets. Otherwise, the default repository will be used.
         - ``XCLIM_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of the repository
@@ -302,22 +236,68 @@ def nimbus(  # noqa: PR01
         data = xr.open_dataset(example_file)
     """
     remote = f"{repo}/raw/{branch}/data"
-
     return pooch.create(
         path=data_dir,
         base_url=remote,
         version=default_testdata_version,
         version_dev=branch,
         allow_updates=data_updates,
-        registry=load_registry(repo=repo, branch=branch),
+        registry=load_registry(),
     )
 
 
+# idea copied from raven that it borrowed from xclim that borrowed it from xarray that was borrowed from Seaborn
+def open_dataset(
+    name: str | os.PathLike[str],
+    dap_url: str | None = None,
+    cache_dir: str | os.PathLike[str] = default_cache_dir,
+    **kwargs,
+) -> Dataset:
+    r"""Open a dataset from the online GitHub-like repository.
+
+    If a local copy is found then always use that to avoid network traffic.
+
+    Parameters
+    ----------
+    name : str
+        Name of the file containing the dataset.
+    dap_url : str, optional
+        URL to OPeNDAP folder where the data is stored. If supplied, supersedes github_url.
+    cache_dir : Path
+        The directory in which to search for and write cached data.
+    \*\*kwargs
+        For NetCDF files, keywords passed to :py:func:`xarray.open_dataset`.
+
+    Returns
+    -------
+    Union[Dataset, Path]
+
+    See Also
+    --------
+    xarray.open_dataset
+    """
+    if dap_url:
+        try:
+            return _open_dataset(
+                audit_url(urljoin(dap_url, str(name)), context="OPeNDAP"), **kwargs
+            )
+        except (OSError, URLError):
+            msg = f"OPeNDAP file not read. Verify that the service is available: '{urljoin(dap_url, str(name))}'"
+            logger.error(msg)
+            raise
+
+    local_file = Path(cache_dir).joinpath(name)
+    try:
+        ds = _open_dataset(local_file, **kwargs)
+        return ds
+    except OSError as err:
+        raise err
+
+
 def populate_testing_data(
-    registry_file: str | Path | None = None,
     temp_folder: Path | None = None,
-    repo: str | None = None,
-    branch: str | None = None,
+    repo: str = TESTDATA_REPO_URL,
+    branch: str = TESTDATA_BRANCH,
     local_cache: Path = default_cache_dir,
 ) -> None:
     """Populate the local cache with the testing data.
@@ -329,37 +309,21 @@ def populate_testing_data(
     repo : str, optional
         URL of the repository to use when fetching testing datasets.
     branch : str, optional
-        Branch of hydrologie/xhydro-testdata to use when fetching testing datasets.
+        Branch of Ouranosinc/xclim-testdata to use when fetching testing datasets.
     local_cache : Path
-        Path to the local cache. Defaults to the location set by the platformdirs library.
+        The path to the local cache. Defaults to the location set by the platformdirs library.
         The testing data will be downloaded to this local cache.
 
     Returns
     -------
     None
     """
-    if repo is None:
-        _repo = TESTDATA_REPO_URL
-    else:
-        _repo = repo
-    if branch is None:
-        _branch = TESTDATA_BRANCH
-    else:
-        _branch = branch
-    if temp_folder is not None:
-        _local_cache = temp_folder
-    else:
-        _local_cache = Path(local_cache)
-
     # Create the Pooch instance
-    n = nimbus(data_dir=_local_cache, repo=_repo, branch=_branch)
-
-    # Load the registry file
-    registry = load_registry(file=registry_file, repo=_repo, branch=_branch)
+    n = nimbus(data_dir=temp_folder or local_cache, repo=repo, branch=branch)
 
     # Download the files
     errored_files = []
-    for file in registry.keys():
+    for file in load_registry():
         try:
             n.fetch(file)
         except HTTPError:
@@ -375,30 +339,28 @@ def populate_testing_data(
             raise SocketBlockedError(msg) from e
         else:
             logging.info("Files were downloaded successfully.")
-        finally:
-            if errored_files:
-                logging.error(
-                    "The following files were unable to be downloaded: %s",
-                    errored_files,
-                )
+
+    if errored_files:
+        logging.error(
+            "The following files were unable to be downloaded: %s",
+            errored_files,
+        )
 
 
-def generate_atmos(cache_dir: Path) -> dict[str, xr.DataArray]:
+def generate_atmos(cache_dir: str | os.PathLike[str] | Path) -> dict[str, xr.DataArray]:
     """Create the `atmosds` synthetic testing dataset."""
-    with _open_dataset(
+    with open_dataset(
         "ERA5/daily_surface_cancities_1990-1993.nc",
         cache_dir=cache_dir,
-        branch=TESTDATA_BRANCH,
         engine="h5netcdf",
     ) as ds:
+        rsus = shortwave_upwelling_radiation_from_net_downwelling(ds.rss, ds.rsds)
+        rlus = longwave_upwelling_radiation_from_net_downwelling(ds.rls, ds.rlds)
         tn10 = calendar.percentile_doy(ds.tasmin, per=10)
         t10 = calendar.percentile_doy(ds.tas, per=10)
         t90 = calendar.percentile_doy(ds.tas, per=90)
         tx90 = calendar.percentile_doy(ds.tasmax, per=90)
 
-        rsus = shortwave_upwelling_radiation_from_net_downwelling(ds.rss, ds.rsds)
-        rlus = longwave_upwelling_radiation_from_net_downwelling(ds.rls, ds.rlds)
-
         ds = ds.assign(
             rsus=rsus,
             rlus=rlus,
@@ -413,18 +375,19 @@ def generate_atmos(cache_dir: Path) -> dict[str, xr.DataArray]:
         ds.to_netcdf(atmos_file, engine="h5netcdf")
 
     # Give access to dataset variables by name in namespace
-    namespace = dict()
-    with _open_dataset(
-        atmos_file, branch=TESTDATA_BRANCH, cache_dir=cache_dir, engine="h5netcdf"
-    ) as ds:
-        for variable in ds.data_vars:
-            namespace[f"{variable}_dataset"] = ds.get(variable)
+    with open_dataset(atmos_file, cache_dir=cache_dir, engine="h5netcdf") as ds:
+        namespace = {f"{var}_dataset": ds[var] for var in ds.data_vars}
     return namespace
 
 
-def gather_testing_data(threadsafe_data_dir: Path, worker_id: str):
+def gather_testing_data(
+    threadsafe_data_dir: str | os.PathLike[str] | Path, worker_id: str
+):
     """Gather testing data across workers."""
-    if not default_cache_dir.exists() or PREFETCH_TESTING_DATA:
+    if (
+        not default_cache_dir.joinpath(default_testdata_version).exists()
+        or PREFETCH_TESTING_DATA
+    ):
         if PREFETCH_TESTING_DATA:
             print("`XCLIM_PREFETCH_TESTING_DATA` set. Prefetching testing data...")
         if platform == "win32":
@@ -432,7 +395,7 @@ def gather_testing_data(threadsafe_data_dir: Path, worker_id: str):
                 "UNIX-style file-locking is not supported on Windows. "
                 "Consider running `$ xclim prefetch_testing_data` to download testing data."
             )
-        elif worker_id in ["master"]:
+        elif worker_id == "master":
             populate_testing_data(branch=TESTDATA_BRANCH)
         else:
             default_cache_dir.mkdir(exist_ok=True, parents=True)
@@ -445,29 +408,33 @@ def gather_testing_data(threadsafe_data_dir: Path, worker_id: str):
             with test_data_being_written.acquire():
                 if lockfile.exists():
                     lockfile.unlink()
-    copytree(default_cache_dir, threadsafe_data_dir)
+    copytree(default_cache_dir.joinpath(default_testdata_version), threadsafe_data_dir)
 
 
 def add_example_file_paths() -> dict[str, str | list[xr.DataArray]]:
     """Create a dictionary of relevant datasets to be patched into the xdoctest namespace."""
-    namespace: dict = dict()
-    namespace["path_to_ensemble_file"] = "EnsembleReduce/TestEnsReduceCriteria.nc"
-    namespace["path_to_pr_file"] = "NRCANdaily/nrcan_canada_daily_pr_1990.nc"
-    namespace["path_to_sfcWind_file"] = "ERA5/daily_surface_cancities_1990-1993.nc"
-    namespace["path_to_tas_file"] = "ERA5/daily_surface_cancities_1990-1993.nc"
-    namespace["path_to_tasmax_file"] = "NRCANdaily/nrcan_canada_daily_tasmax_1990.nc"
-    namespace["path_to_tasmin_file"] = "NRCANdaily/nrcan_canada_daily_tasmin_1990.nc"
+    namespace = {
+        "path_to_ensemble_file": "EnsembleReduce/TestEnsReduceCriteria.nc",
+        "path_to_pr_file": "NRCANdaily/nrcan_canada_daily_pr_1990.nc",
+        "path_to_sfcWind_file": "ERA5/daily_surface_cancities_1990-1993.nc",
+        "path_to_tas_file": "ERA5/daily_surface_cancities_1990-1993.nc",
+        "path_to_tasmax_file": "NRCANdaily/nrcan_canada_daily_tasmax_1990.nc",
+        "path_to_tasmin_file": "NRCANdaily/nrcan_canada_daily_tasmin_1990.nc",
+        "path_to_example_py": (
+            Path(__file__).parent.parent.parent.parent
+            / "docs"
+            / "notebooks"
+            / "example.py"
+        ),
+    }
 
     # For core.utils.load_module example
-    namespace["path_to_example_py"] = (
-        Path(__file__).parent.parent.parent.parent / "docs" / "notebooks" / "example.py"
-    )
 
-    time = xr.cftime_range("1990-01-01", "2049-12-31", freq="D")
+    sixty_years = xr.cftime_range("1990-01-01", "2049-12-31", freq="D")
     namespace["temperature_datasets"] = [
         xr.DataArray(
-            12 * np.random.random_sample(time.size) + 273,
-            coords={"time": time},
+            12 * np.random.random_sample(sixty_years.size) + 273,
+            coords={"time": sixty_years},
             name="tas",
             dims=("time",),
             attrs={
@@ -477,8 +444,8 @@ def add_example_file_paths() -> dict[str, str | list[xr.DataArray]]:
             },
         ),
         xr.DataArray(
-            12 * np.random.random_sample(time.size) + 273,
-            coords={"time": time},
+            12 * np.random.random_sample(sixty_years.size) + 273,
+            coords={"time": sixty_years},
             name="tas",
             dims=("time",),
             attrs={
@@ -551,3 +518,24 @@ def _raise_on_compute(dsk: dict):
 
 assert_lazy = Callback(start=_raise_on_compute)
 """Context manager that raises an AssertionError if any dask computation is triggered."""
+
+
+def audit_url(url: str, context: str | None = None) -> str:
+    """Check if the URL is well-formed.
+
+    Raises
+    ------
+    URLError
+        If the URL is not well-formed.
+    """
+    msg = ""
+    result = urlparse(url)
+    if result.scheme == "http":
+        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
+    if not all([result.scheme, result.netloc]):
+        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
+
+    if msg:
+        logger.error(msg)
+        raise URLError(msg)
+    return url
diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py
index b396c4a99..6120582f3 100644
--- a/xclim/testing/utils.py
+++ b/xclim/testing/utils.py
@@ -16,18 +16,6 @@
 from io import StringIO
 from pathlib import Path
 from typing import TextIO
-from urllib.error import HTTPError, URLError
-from urllib.parse import urljoin, urlparse
-from urllib.request import urlretrieve
-
-import pooch
-from xarray import Dataset
-from xarray import open_dataset as _open_dataset
-
-try:
-    from pytest_socket import SocketBlockedError
-except ImportError:
-    SocketBlockedError = None
 
 _xclim_deps = [
     "xclim",
@@ -51,152 +39,18 @@
     "boltons",
 ]
 
-default_cache_dir = Path(pooch.os_cache("xclim-testdata"))
-"""Default location for the testing data cache."""
 
 logger = logging.getLogger("xclim")
 
+
 __all__ = [
-    "audit_url",
-    "default_cache_dir",
     "list_input_variables",
-    "open_dataset",
     "publish_release_notes",
     "run_doctests",
     "show_versions",
 ]
 
 
-def audit_url(url: str, context: str | None = None) -> str:
-    """Check if the URL is well-formed.
-
-    Raises
-    ------
-    URLError
-        If the URL is not well-formed.
-    """
-    msg = ""
-    result = urlparse(url)
-    if result.scheme == "http":
-        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
-    if not all([result.scheme, result.netloc]):
-        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
-
-    if msg:
-        logger.error(msg)
-        raise URLError(msg)
-    return url
-
-
-def _get(
-    name: Path,
-    github_url: str,
-    branch: str,
-    cache_dir: Path,
-) -> Path:
-    cache_dir = cache_dir.absolute()
-    local_file = cache_dir / branch / name
-
-    if not github_url.startswith("https"):
-        raise ValueError(f"GitHub URL not secure: '{github_url}'.")
-
-    if not local_file.is_file():
-        # This will always leave this directory on disk.
-        # We may want to add an option to remove it.
-        local_file.parent.mkdir(exist_ok=True, parents=True)
-        url = "/".join((github_url, "raw", branch, "data", name.as_posix()))
-        msg = f"Fetching remote file: {name.as_posix()}"
-        logger.info(msg)
-        try:
-            urlretrieve(audit_url(url), local_file)  # noqa: S310
-        except HTTPError as e:
-            msg = (
-                f"{name.as_posix()} not accessible in remote repository: {url}. "
-                "Aborting file retrieval."
-            )
-            raise FileNotFoundError(msg) from e
-        except SocketBlockedError as e:
-            msg = (
-                f"Unable to access {name.as_posix()} online. Testing suite is being run with `--disable-socket`. "
-                f"If you intend to run tests with this option enabled, please download the file beforehand with the "
-                f"following console command: `xclim prefetch_testing_data`."
-            )
-            raise FileNotFoundError(msg) from e
-
-    return local_file
-
-
-# idea copied from raven that it borrowed from xclim that borrowed it from xarray that was borrowed from Seaborn
-def open_dataset(
-    name: str | os.PathLike[str],
-    dap_url: str | None = None,
-    github_url: str = "https://github.com/Ouranosinc/xclim-testdata",
-    branch: str = "main",
-    cache: bool = True,
-    cache_dir: Path = default_cache_dir,
-    **kwargs,
-) -> Dataset:
-    r"""Open a dataset from the online GitHub-like repository.
-
-    If a local copy is found then always use that to avoid network traffic.
-
-    Parameters
-    ----------
-    name : str or os.PathLike
-        Name of the file containing the dataset.
-    dap_url : str, optional
-        URL to OPeNDAP folder where the data is stored. If supplied, supersedes github_url.
-    github_url : str
-        URL to GitHub repository where the data is stored.
-    branch : str, optional
-        For GitHub-hosted files, the branch to download from.
-    cache_dir : Path
-        The directory in which to search for and write cached data.
-    cache : bool
-        If True, then cache data locally for use on subsequent calls.
-    \*\*kwargs
-        For NetCDF files, keywords passed to :py:func:`xarray.open_dataset`.
-
-    Returns
-    -------
-    Union[Dataset, Path]
-
-    See Also
-    --------
-    xarray.open_dataset
-    """
-    if isinstance(name, (str, os.PathLike)):
-        name = Path(name)
-
-    if dap_url is not None:
-        dap_file_address = urljoin(dap_url, str(name))
-        try:
-            ds = _open_dataset(audit_url(dap_file_address, context="OPeNDAP"), **kwargs)
-            return ds
-        except URLError:
-            raise
-        except OSError:
-            msg = f"OPeNDAP file not read. Verify that the service is available: '{dap_file_address}'"
-            logger.error(msg)
-            raise OSError(msg)
-
-    local_file = _get(
-        name=name,
-        github_url=github_url,
-        branch=branch,
-        cache_dir=cache_dir,
-    )
-
-    try:
-        ds = _open_dataset(local_file, **kwargs)
-        if not cache:
-            ds = ds.load()
-            local_file.unlink()
-        return ds
-    except OSError as err:
-        raise err
-
-
 def list_input_variables(
     submodules: Sequence[str] | None = None, realms: Sequence[str] | None = None
 ) -> dict: