From 026b18c2d4d4621ce99b1f02a2ff86805e73d056 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 23 Mar 2022 15:11:07 +0100 Subject: [PATCH 1/4] Add gpu resource requests/limits to pod args --- dask_kubernetes/objects.py | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/dask_kubernetes/objects.py b/dask_kubernetes/objects.py index 6e40326c9..063d33c19 100644 --- a/dask_kubernetes/objects.py +++ b/dask_kubernetes/objects.py @@ -116,11 +116,13 @@ def make_pod_spec( env={}, extra_container_config={}, extra_pod_config={}, - memory_limit=None, resources=None, + memory_limit=None, memory_request=None, cpu_limit=None, cpu_request=None, + gpu_limit=None, + gpu_request=None, annotations={}, ): """ @@ -140,21 +142,29 @@ def make_pod_spec( Extra config attributes to set on the container object extra_pod_config : dict Extra config attributes to set on the pod object - memory_limit : int, float, or str - Bytes of memory per process that the worker can use. - This can be: - - an integer (bytes), note 0 is a special case for no memory management. - - a float (fraction of total system memory). - - a string (like 5GB or 5000M). - - 'auto' for automatically computing the memory limit. [default: auto] resources : str Resources for task constraints like "GPU=2 MEM=10e9". Resources are applied separately to each worker process (only relevant when starting multiple worker processes. Passed to the `--resources` option in ``dask-worker``. + memory_limit : int, float, or str + Bytes of memory per process that the worker can use (applied to both + ``dask-worker --memory-limit`` and ``spec.containers[].resources.limits.memory``). + This can be: + - an integer (bytes), note 0 is a special case for no memory management. + - a float (bytes). Note: fraction of total system memory is not supported by k8s. + - a string (like 5GiB or 5000M). Note: 'GB' is not supported by k8s. + - 'auto' for automatically computing the memory limit. [default: auto] + memory_request : int, float, or str + Like ``memory_limit`` (applied only to ``spec.containers[].resources.requests.memory`` + and ignored by ``dask-worker``). cpu_limit : float or str - CPU resource limits (applied to ``spec.containers[].resources.limits.cpu``) - cpu_requests : float or str - CPU resource requests (applied to ``spec.containers[].resources.requests.cpu``) + CPU resource limits (applied to ``spec.containers[].resources.limits.cpu``). + cpu_request : float or str + CPU resource requests (applied to ``spec.containers[].resources.requests.cpu``). + gpu_limit : int + GPU resource limits (applied to ``spec.containers[].resources.limits."nvidia.com/gpu"``). + gpu_request : int + GPU resource requests (applied to ``spec.containers[].resources.requests."nvidia.com/gpu"``). annotations : dict Dict of annotations passed to ``V1ObjectMeta`` @@ -197,11 +207,15 @@ def make_pod_spec( if cpu_request: resources.requests["cpu"] = cpu_request + if gpu_request: + resources.requests["nvidia.com/gpu"] = gpu_request if memory_request: resources.requests["memory"] = memory_request if cpu_limit: resources.limits["cpu"] = cpu_limit + if gpu_limit: + resources.limits["nvidia.com/gpu"] = gpu_limit if memory_limit: resources.limits["memory"] = memory_limit From f9eedd436bec361affcbba4ec6fc708ee20b680b Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 23 Mar 2022 17:22:06 +0100 Subject: [PATCH 2/4] Apply suggestions from code review --- dask_kubernetes/objects.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dask_kubernetes/objects.py b/dask_kubernetes/objects.py index 063d33c19..73993c0a6 100644 --- a/dask_kubernetes/objects.py +++ b/dask_kubernetes/objects.py @@ -163,8 +163,6 @@ def make_pod_spec( CPU resource requests (applied to ``spec.containers[].resources.requests.cpu``). gpu_limit : int GPU resource limits (applied to ``spec.containers[].resources.limits."nvidia.com/gpu"``). - gpu_request : int - GPU resource requests (applied to ``spec.containers[].resources.requests."nvidia.com/gpu"``). annotations : dict Dict of annotations passed to ``V1ObjectMeta`` @@ -207,8 +205,6 @@ def make_pod_spec( if cpu_request: resources.requests["cpu"] = cpu_request - if gpu_request: - resources.requests["nvidia.com/gpu"] = gpu_request if memory_request: resources.requests["memory"] = memory_request From db37db97acced0c156464653d849a3ded8bdfab5 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 24 Mar 2022 07:13:10 +0100 Subject: [PATCH 3/4] Replace V1beta1Eviction with V1Eviction in tests --- dask_kubernetes/tests/test_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask_kubernetes/tests/test_async.py b/dask_kubernetes/tests/test_async.py index 117942b45..b9cd2bdd7 100644 --- a/dask_kubernetes/tests/test_async.py +++ b/dask_kubernetes/tests/test_async.py @@ -405,7 +405,7 @@ async def test_reject_evicted_workers(cluster): await cluster.core_api.create_namespaced_pod_eviction( (await worker.describe_pod()).metadata.name, (await worker.describe_pod()).metadata.namespace, - kubernetes.client.V1beta1Eviction( + kubernetes.client.V1Eviction( delete_options=kubernetes.client.V1DeleteOptions(grace_period_seconds=300), metadata=(await worker.describe_pod()).metadata, ), From e4485a4c21e2b68cf133eef9a68f95118908c20f Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 24 Mar 2022 07:50:56 +0100 Subject: [PATCH 4/4] Remove gpu_request leftover --- dask_kubernetes/objects.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dask_kubernetes/objects.py b/dask_kubernetes/objects.py index 73993c0a6..3c0a8d9f8 100644 --- a/dask_kubernetes/objects.py +++ b/dask_kubernetes/objects.py @@ -122,7 +122,6 @@ def make_pod_spec( cpu_limit=None, cpu_request=None, gpu_limit=None, - gpu_request=None, annotations={}, ): """