added a new qos resolver (#2)

zaccharieramzi · Feb 22, 2022 · 6a253a7 · 6a253a7
1 parent bf9cc37
commit 6a253a7
Show file tree

Hide file tree

Showing 9 changed files with 39 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -45,6 +45,11 @@ For example, if you want to use the gpu_p2 partition, you would need to do:
 hydra-submitit-launch my_app.py dev hydra.launcher.setup=null hydra.launcher.partition=gpu_p2
 ```
 
+In order to change the timeout on the SLURM job to for example 10 hours, you would need to do:
+```
+hydra-submitit-launch my_app.py base +hydra.launcher.hours=10
+```
+This will automatically select the right qos for you.
 
 ## References
 - Hydra: https://hydra.cc/docs/intro/

diff --git a/conf/config.yaml b/conf/config.yaml
@@ -0,0 +1 @@
+fake_param: 4
diff --git a/example_app.py b/example_app.py
@@ -0,0 +1,11 @@
+import hydra
+
+from omegaconf import OmegaConf
+
+
+@hydra.main(config_path='conf', config_name='config')
+def example_main(cfg):
+    print(OmegaConf.to_container(cfg, resolve=True))
+
+if __name__ == '__main__':
+    example_main()
diff --git a/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/4gpus_dev.yaml b/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/4gpus_dev.yaml
@@ -1,5 +1,4 @@
 defaults:
   - dev
 
-cpus_per_task: 40
-gpus_per_node: 4
+gpus_per_node: 4
diff --git a/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/4gpus_t3.yaml b/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/4gpus_t3.yaml
@@ -1,5 +1,4 @@
 defaults:
   - t3
 
-cpus_per_task: 40
-gpus_per_node: 4
+gpus_per_node: 4
diff --git a/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/4gpus_t4.yaml b/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/4gpus_t4.yaml
@@ -1,5 +1,4 @@
 defaults:
   - t4
 
-cpus_per_task: 40
-gpus_per_node: 4
+gpus_per_node: 4
diff --git a/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/base.yaml b/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/base.yaml
@@ -5,13 +5,14 @@ timeout_min: 60
 gpus_per_node: 1
 tasks_per_node: 1
 gres: "gpu:${hydra.launcher.gpus_per_node}"
-qos: qos_gpu-dev
+qos: ${qos_from_hours:${hours}}
 cpus_per_gpu: 10
+cpus_per_task: ${cpu_from_gpu:${hydra.launcher.gpus_per_node},${hydra.launcher.cpus_per_gpu}}
 gpus_per_task: ${hydra.launcher.gpus_per_node}
 additional_parameters:
   account: ${oc.env:IDRPROJ}@gpu
   distribution: "block:block"
   hint: nomultithread
   time: "${hours}:00:00"
 setup:
-  - "#SBATCH -C v100-32g"
+  - "#SBATCH -C v100-32g"
diff --git a/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/dev.yaml b/hydra_plugins/jz_hydra_submitit_launcher/hydra/launcher/dev.yaml
@@ -1,5 +1,6 @@
 defaults:
   - base
 
+qos: qos_gpu-dev
 additional_parameters:
   time: "2:00:00"
diff --git a/hydra_plugins/jz_hydra_submitit_launcher/resolvers.py b/hydra_plugins/jz_hydra_submitit_launcher/resolvers.py
@@ -1,3 +1,17 @@
 from omegaconf import OmegaConf
 
-OmegaConf.register_new_resolver("multiply10", lambda x: x * 10)
+
+def time_to_qos(timeout_hour):
+    timeout_hour = int(timeout_hour)
+    if timeout_hour > 20:
+        qos = 't4'
+    elif timeout_hour > 2:
+        qos = 't3'
+    else:
+        qos = 'dev'
+
+    qos = f'qos_gpu-{qos}'
+    return qos
+
+OmegaConf.register_new_resolver("qos_from_hours", time_to_qos, replace=True)
+OmegaConf.register_new_resolver("cpu_from_gpu", lambda x,y: x*y, replace=True)