From 06fd778174442d26567dc2ddb9e0b12eed161e13 Mon Sep 17 00:00:00 2001
From: Lucas A Estrada <63303345+laestrada@users.noreply.github.com>
Date: Mon, 2 Sep 2024 15:50:14 -0400
Subject: [PATCH] add additional config variables to control inversion step
 resource allocation (#272)

Optionally allow user to control the resources allocated to the inversion step separately from the RequestedMemory, RequestedCPUs, and RequestedTime. If InversionMemory, InversionCPUs, and InversionTime are not set, then the default Memory and CPUs will be used.
---
 config.yml                                             |  6 ++++--
 docs/source/getting-started/imi-config-file.rst        | 10 +++++++++-
 .../config.harvard-cannon.global_inv.yml               |  2 ++
 envs/Harvard-Cannon/config.harvard-cannon.yml          |  2 ++
 resources/containers/al2/container_config.yml          |  6 ++++--
 src/components/inversion_component/inversion.sh        | 10 +++++++---
 6 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/config.yml b/config.yml
index 635ed06d..93efb84d 100644
--- a/config.yml
+++ b/config.yml
@@ -127,8 +127,10 @@ DoPreview: true
 DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
-RequestedCPUs: 32
-RequestedMemory: 32000
+RequestedCPUs: 8
+RequestedMemory: 10000
+InversionCPUs: 32
+InversionMemory: 32000
 RequestedTime: "0-24:00"
 SchedulerPartition: "debug"
 
diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst
index 5a0d412b..233d596a 100644
--- a/docs/source/getting-started/imi-config-file.rst
+++ b/docs/source/getting-started/imi-config-file.rst
@@ -250,7 +250,9 @@ IMI preview
 Job Resource Allocation
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 These settings are used to allocate resources (CPUs and Memory) to the different simulations needed to run the inversion.
-Note: some python scripts are also deployed using slurm and default to using the ``RequestedCPUs`` and ``RequestedMemory`` settings.
+Note: some python scripts are also deployed using slurm and default to using the ``RequestedCPUs`` and ``RequestedMemory`` settings. 
+If the inversion step requires more resources than the rest of the IMI workflow, using the optional ``InversionCPUs`` and ``InversionMemory`` 
+variables can be convenient.
 
 .. list-table::
    :widths: 30, 70
@@ -262,6 +264,12 @@ Note: some python scripts are also deployed using slurm and default to using the
      - Amount of memory to allocate to each in series simulation (in MB).
    * - ``RequestedTime``
      - Max amount of time to allocate to each sbatch job (eg. "0-6:00")
+   * - ``InversionCPUs``
+     - Optional Variable. Number of cores to allocate to the inversion job if different from ``RequestedMemory``.
+   * - ``InversionMemory``
+     - Optional Variable. Max amount of time to allocate to inversion sbatch job (eg. "0-6:00") if different from ``RequestedTime``.
+   * - ``InversionTime``
+     - Optional Variable. Amount of memory to allocate to the inversion job (in MB) if different from ``RequestedMemory``.
    * - ``SchedulerPartition``
      - Name of the partition(s) you would like all slurm jobs to run on (eg. "debug,huce_cascade,seas_compute,etc").
    * - ``MaxSimultaneousRuns``
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
index d9717645..deb580d7 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml
@@ -129,6 +129,8 @@ DOFSThreshold: 0
 ## Resource allocation settings for slurm jobs
 RequestedCPUs: 32
 RequestedMemory: 32000
+InversionCPUs: 32
+InversionMemory: 64000
 RequestedTime: "0-24:00"
 SchedulerPartition: "sapphire,huce_cascade,seas_compute,shared"
 
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
index b0d8fa76..9603517d 100644
--- a/envs/Harvard-Cannon/config.harvard-cannon.yml
+++ b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -129,6 +129,8 @@ DOFSThreshold: 0
 ## Resource allocation settings for slurm jobs
 RequestedCPUs: 32
 RequestedMemory: 32000
+InversionCPUs: 32
+InversionMemory: 64000
 RequestedTime: "0-24:00"
 SchedulerPartition: "sapphire,huce_cascade,seas_compute,shared"
 
diff --git a/resources/containers/al2/container_config.yml b/resources/containers/al2/container_config.yml
index 990b6545..bf0bc3ee 100644
--- a/resources/containers/al2/container_config.yml
+++ b/resources/containers/al2/container_config.yml
@@ -127,8 +127,10 @@ DoPreview: true
 DOFSThreshold: 0
 
 ## Resource allocation settings for slurm jobs
-RequestedCPUs: 16
-RequestedMemory: 16000
+RequestedCPUs: 8
+RequestedMemory: 8000
+InversionCPUs: 16
+InversionMemory: 16000
 RequestedTime: "0-24:00"
 SchedulerPartition: "debug"
 
diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh
index e924f49d..65e8f78f 100644
--- a/src/components/inversion_component/inversion.sh
+++ b/src/components/inversion_component/inversion.sh
@@ -80,10 +80,14 @@ run_inversion() {
         cd ${RunDirs}/inversion
     fi
 
+    # Set inversion memory, CPUs, and time
+    InvMem="${InversionMemory:-$RequestedMemory}"
+    InvCPU="${InversionCPUs:-$RequestedCPUs}"
+    InvTime="${InversionTime:-$RequestedTime}"
     # Execute inversion driver script
-    sbatch --mem $RequestedMemory \
-           -c $RequestedCPUs \
-           -t $RequestedTime \
+    sbatch --mem $InvMem \
+           -c $InvCPU \
+           -t $InvTime \
            -p $SchedulerPartition \
            -W run_inversion.sh $FirstSimSwitch; wait;