From 06fd778174442d26567dc2ddb9e0b12eed161e13 Mon Sep 17 00:00:00 2001 From: Lucas A Estrada <63303345+laestrada@users.noreply.github.com> Date: Mon, 2 Sep 2024 15:50:14 -0400 Subject: [PATCH] add additional config variables to control inversion step resource allocation (#272) Optionally allow user to control the resources allocated to the inversion step separately from the RequestedMemory, RequestedCPUs, and RequestedTime. If InversionMemory, InversionCPUs, and InversionTime are not set, then the default Memory and CPUs will be used. --- config.yml | 6 ++++-- docs/source/getting-started/imi-config-file.rst | 10 +++++++++- .../config.harvard-cannon.global_inv.yml | 2 ++ envs/Harvard-Cannon/config.harvard-cannon.yml | 2 ++ resources/containers/al2/container_config.yml | 6 ++++-- src/components/inversion_component/inversion.sh | 10 +++++++--- 6 files changed, 28 insertions(+), 8 deletions(-) diff --git a/config.yml b/config.yml index 635ed06d..93efb84d 100644 --- a/config.yml +++ b/config.yml @@ -127,8 +127,10 @@ DoPreview: true DOFSThreshold: 0 ## Resource allocation settings for slurm jobs -RequestedCPUs: 32 -RequestedMemory: 32000 +RequestedCPUs: 8 +RequestedMemory: 10000 +InversionCPUs: 32 +InversionMemory: 32000 RequestedTime: "0-24:00" SchedulerPartition: "debug" diff --git a/docs/source/getting-started/imi-config-file.rst b/docs/source/getting-started/imi-config-file.rst index 5a0d412b..233d596a 100644 --- a/docs/source/getting-started/imi-config-file.rst +++ b/docs/source/getting-started/imi-config-file.rst @@ -250,7 +250,9 @@ IMI preview Job Resource Allocation ~~~~~~~~~~~~~~~~~~~~~~~~~ These settings are used to allocate resources (CPUs and Memory) to the different simulations needed to run the inversion. -Note: some python scripts are also deployed using slurm and default to using the ``RequestedCPUs`` and ``RequestedMemory`` settings. +Note: some python scripts are also deployed using slurm and default to using the ``RequestedCPUs`` and ``RequestedMemory`` settings. +If the inversion step requires more resources than the rest of the IMI workflow, using the optional ``InversionCPUs`` and ``InversionMemory`` +variables can be convenient. .. list-table:: :widths: 30, 70 @@ -262,6 +264,12 @@ Note: some python scripts are also deployed using slurm and default to using the - Amount of memory to allocate to each in series simulation (in MB). * - ``RequestedTime`` - Max amount of time to allocate to each sbatch job (eg. "0-6:00") + * - ``InversionCPUs`` + - Optional Variable. Number of cores to allocate to the inversion job if different from ``RequestedMemory``. + * - ``InversionMemory`` + - Optional Variable. Max amount of time to allocate to inversion sbatch job (eg. "0-6:00") if different from ``RequestedTime``. + * - ``InversionTime`` + - Optional Variable. Amount of memory to allocate to the inversion job (in MB) if different from ``RequestedMemory``. * - ``SchedulerPartition`` - Name of the partition(s) you would like all slurm jobs to run on (eg. "debug,huce_cascade,seas_compute,etc"). * - ``MaxSimultaneousRuns`` diff --git a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml index d9717645..deb580d7 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.global_inv.yml @@ -129,6 +129,8 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs RequestedCPUs: 32 RequestedMemory: 32000 +InversionCPUs: 32 +InversionMemory: 64000 RequestedTime: "0-24:00" SchedulerPartition: "sapphire,huce_cascade,seas_compute,shared" diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml index b0d8fa76..9603517d 100644 --- a/envs/Harvard-Cannon/config.harvard-cannon.yml +++ b/envs/Harvard-Cannon/config.harvard-cannon.yml @@ -129,6 +129,8 @@ DOFSThreshold: 0 ## Resource allocation settings for slurm jobs RequestedCPUs: 32 RequestedMemory: 32000 +InversionCPUs: 32 +InversionMemory: 64000 RequestedTime: "0-24:00" SchedulerPartition: "sapphire,huce_cascade,seas_compute,shared" diff --git a/resources/containers/al2/container_config.yml b/resources/containers/al2/container_config.yml index 990b6545..bf0bc3ee 100644 --- a/resources/containers/al2/container_config.yml +++ b/resources/containers/al2/container_config.yml @@ -127,8 +127,10 @@ DoPreview: true DOFSThreshold: 0 ## Resource allocation settings for slurm jobs -RequestedCPUs: 16 -RequestedMemory: 16000 +RequestedCPUs: 8 +RequestedMemory: 8000 +InversionCPUs: 16 +InversionMemory: 16000 RequestedTime: "0-24:00" SchedulerPartition: "debug" diff --git a/src/components/inversion_component/inversion.sh b/src/components/inversion_component/inversion.sh index e924f49d..65e8f78f 100644 --- a/src/components/inversion_component/inversion.sh +++ b/src/components/inversion_component/inversion.sh @@ -80,10 +80,14 @@ run_inversion() { cd ${RunDirs}/inversion fi + # Set inversion memory, CPUs, and time + InvMem="${InversionMemory:-$RequestedMemory}" + InvCPU="${InversionCPUs:-$RequestedCPUs}" + InvTime="${InversionTime:-$RequestedTime}" # Execute inversion driver script - sbatch --mem $RequestedMemory \ - -c $RequestedCPUs \ - -t $RequestedTime \ + sbatch --mem $InvMem \ + -c $InvCPU \ + -t $InvTime \ -p $SchedulerPartition \ -W run_inversion.sh $FirstSimSwitch; wait;