From 4fc7c3db895dca18ad7f1240a46f81fe93c455ac Mon Sep 17 00:00:00 2001 From: Jacopo De Amicis Date: Tue, 23 Jan 2024 00:50:11 +0100 Subject: [PATCH] Use static IP to populate slurmdbd's DbdAddr parameter Export the IP into the instance dna.json. Add minor comments to the code. Signed-off-by: Jacopo De Amicis --- .../recipes/external_slurmdbd_config.rb | 2 -- .../recipes/config/config_slurm_accounting.rb | 3 +++ .../slurm/external_slurmdbd/slurm_external_slurmdbd.conf.erb | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cookbooks/aws-parallelcluster-entrypoints/recipes/external_slurmdbd_config.rb b/cookbooks/aws-parallelcluster-entrypoints/recipes/external_slurmdbd_config.rb index bbbc7626b..2d6d67dd7 100644 --- a/cookbooks/aws-parallelcluster-entrypoints/recipes/external_slurmdbd_config.rb +++ b/cookbooks/aws-parallelcluster-entrypoints/recipes/external_slurmdbd_config.rb @@ -49,8 +49,6 @@ include_recipe 'aws-parallelcluster-slurm::config_head_node_directories' -# TODO: configuration of munge systemd service; - # TODO: move this template to a separate recipe # TODO: add a logic in update_munge_key.sh.erb to skip sharing munge key to shared dir template "#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh" do diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb index d66fc0ee0..b3537923d 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb @@ -32,6 +32,7 @@ action :create_if_missing variables( dbd_host: "localhost", + dbd_addr: node['slurmdbd_ip'], storage_host: node['dbms_uri'], # TODO: expose additional CFN Parameter in template storage_port: 3306, @@ -74,6 +75,8 @@ # After starting slurmdbd the database may not be fully responsive yet and # its bootstrapping may fail. We need to wait for sacctmgr to successfully # query the database before proceeding. +# In case of an external slurmdbd the Slurm commands do not work, so this +# check cannot be executed. execute "wait for slurm database" do command "#{node['cluster']['slurm']['install_dir']}/bin/sacctmgr show clusters -Pn" retries node['cluster']['slurmdbd_response_retries'] diff --git a/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/external_slurmdbd/slurm_external_slurmdbd.conf.erb b/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/external_slurmdbd/slurm_external_slurmdbd.conf.erb index 16a66a920..c9becc419 100644 --- a/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/external_slurmdbd/slurm_external_slurmdbd.conf.erb +++ b/cookbooks/aws-parallelcluster-slurm/templates/default/slurm/external_slurmdbd/slurm_external_slurmdbd.conf.erb @@ -2,6 +2,7 @@ # Do not modify. # Please add user-specific slurmdbd configuration options in slurmdbd.conf DbdHost=<%= @dbd_host %> +DbdAddr=<%= @dbd_addr %> StorageHost=<%= @storage_host %> StoragePort=<%= @storage_port %> StorageLoc=<%= @storage_loc %>