scalableminds · normanrz · Nov 16, 2023 · Oct 19, 2023 · Oct 19, 2023 · Oct 24, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -37,7 +37,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        executors: [multiprocessing, slurm, kubernetes]
+        executors: [multiprocessing, slurm, kubernetes, dask]
         python-version: ["3.11", "3.10", "3.9", "3.8"]
     defaults:
       run:
@@ -88,7 +88,7 @@ jobs:
           ./kind load docker-image scalableminds/cluster-tools:latest
 
       - name: Install dependencies (without docker)
-        if: ${{ matrix.executors == 'multiprocessing' || matrix.executors == 'kubernetes' }}
+        if: ${{ matrix.executors != 'slurm' }}
         run: |
           pip install -r ../requirements.txt
           poetry install
@@ -130,6 +130,12 @@ jobs:
           cd tests
           PYTEST_EXECUTORS=kubernetes poetry run python -m pytest -sv test_all.py test_kubernetes.py
 
+      - name: Run dask tests
+        if: ${{ matrix.executors == 'dask' && matrix.python-version != '3.8' }}
+        run: |
+          cd tests
+          PYTEST_EXECUTORS=dask poetry run python -m pytest -sv test_all.py
+
   webknossos_linux:
     needs: changes
     if: |

diff --git a/cluster_tools/Changelog.md b/cluster_tools/Changelog.md
@@ -17,7 +17,6 @@ For upgrade instructions, please check the respective *Breaking Changes* section
 - The cluster address for the `DaskExecutor` can be configured via the `DASK_ADDRESS` env var. [#959](https://github.com/scalableminds/webknossos-libs/pull/959)
 
 ### Changed
-- Upgrades mypy to 1.6. [#956](https://github.com/scalableminds/webknossos-libs/pull/956)
 - Tasks using the `DaskExecutor` are run in their own process. This is required to not block the GIL for the dask worker to communicate with the scheduler. Env variables are propagated to the task processes. [#959](https://github.com/scalableminds/webknossos-libs/pull/959)
 
 ### Fixed

diff --git a/cluster_tools/cluster_tools/executors/dask.py b/cluster_tools/cluster_tools/executors/dask.py
@@ -19,6 +19,7 @@
     TypeVar,
     cast,
 )
+from weakref import ReferenceType, ref
 
 from typing_extensions import ParamSpec
 
@@ -66,12 +67,25 @@ def _run_with_nanny(
 
 def _parse_mem(size: str) -> int:
     units = {"": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40}
-    m = re.match(r"^([\d\.]+)\s*([a-zA-Z]{0,3})$", str(size).strip())
-    assert m is not None
+    m = re.match(r"^([\d\.]+)\s*([kmgtKMGT]{0,1})$", str(size).strip())
+    assert m is not None, f"Could not parse {size}"
     number, unit = float(m.group(1)), m.group(2).upper()
+    assert unit in units
     return int(number * units[unit])
 
 
+def _handle_kill_through_weakref(
+    executor_ref: "ReferenceType[DaskExecutor]",
+    existing_sigint_handler: Any,
+    signum: Optional[int],
+    frame: Any,
+) -> None:
+    executor = executor_ref()
+    if executor is None:
+        return
+    executor.handle_kill(existing_sigint_handler, signum, frame)
+
+
 class DaskExecutor(futures.Executor):
     """
     The `DaskExecutor` allows to run workloads on a dask cluster.
@@ -113,7 +127,10 @@ def __init__(
         # shutdown of the main process which sends SIGTERM signals to terminate all
         # child processes.
         existing_sigint_handler = signal.getsignal(signal.SIGINT)
-        signal.signal(signal.SIGINT, partial(self.handle_kill, existing_sigint_handler))
+        signal.signal(
+            signal.SIGINT,
+            partial(_handle_kill_through_weakref, ref(self), existing_sigint_handler),
+        )
 
     @classmethod
     def from_config(
@@ -224,7 +241,10 @@ def forward_log(self, fut: "Future[_T]") -> _T:
         return fut.result()
 
     def handle_kill(
-        self, existing_sigint_handler: Any, signum: Optional[int], frame: Any
+        self,
+        existing_sigint_handler: Any,
+        signum: Optional[int],
+        frame: Any,
     ) -> None:
         if self.is_shutting_down:
             return

diff --git a/cluster_tools/cluster_tools/schedulers/cluster_executor.py b/cluster_tools/cluster_tools/schedulers/cluster_executor.py
@@ -23,6 +23,7 @@
     Union,
     cast,
 )
+from weakref import ReferenceType, ref
 
 from typing_extensions import ParamSpec
 
@@ -45,6 +46,18 @@
 _S = TypeVar("_S")
 
 
+def _handle_kill_through_weakref(
+    executor_ref: "ReferenceType[ClusterExecutor]",
+    existing_sigint_handler: Any,
+    signum: Optional[int],
+    frame: Any,
+) -> None:
+    executor = executor_ref()
+    if executor is None:
+        return
+    executor.handle_kill(existing_sigint_handler, signum, frame)
+
+
 def join_messages(strings: List[str]) -> str:
     return " ".join(x.strip() for x in strings if x.strip())
 
@@ -130,7 +143,10 @@ def __init__(
         # shutdown of the main process which sends SIGTERM signals to terminate all
         # child processes.
         existing_sigint_handler = signal.getsignal(signal.SIGINT)
-        signal.signal(signal.SIGINT, partial(self.handle_kill, existing_sigint_handler))
+        signal.signal(
+            signal.SIGINT,
+            partial(_handle_kill_through_weakref, ref(self), existing_sigint_handler),
+        )
 
         self.meta_data = {}
         assert not (

diff --git a/cluster_tools/tests/test_all.py b/cluster_tools/tests/test_all.py
@@ -14,7 +14,6 @@
     from distributed import LocalCluster
 
 import cluster_tools
-from cluster_tools.executors.dask import DaskExecutor
 
 
 # "Worker" functions.
@@ -79,10 +78,14 @@ def get_executors(with_debug_sequential: bool = False) -> List[cluster_tools.Exe
         executors.append(cluster_tools.get_executor("sequential"))
     if "dask" in executor_keys:
         if not _dask_cluster:
-            from distributed import LocalCluster
+            from distributed import LocalCluster, Worker
 
-            _dask_cluster = LocalCluster()
-        executors.append(cluster_tools.get_executor("dask", address=_dask_cluster))
+            _dask_cluster = LocalCluster(
+                worker_class=Worker, resources={"mem": 20e9, "cpus": 4}, nthreads=6
+            )
+        executors.append(
+            cluster_tools.get_executor("dask", job_resources={"address": _dask_cluster})
+        )
     if "test_pickling" in executor_keys:
         executors.append(cluster_tools.get_executor("test_pickling"))
     if "pbs" in executor_keys:
@@ -328,7 +331,8 @@ def run_map(executor: cluster_tools.Executor) -> None:
         assert list(result) == [4, 9, 16]
 
     for exc in get_executors():
-        run_map(exc)
+        if not isinstance(exc, cluster_tools.DaskExecutor):
+            run_map(exc)
 
 
 def test_executor_args() -> None: