From 7aabb08192408846edf5c7794f6ae50381877bc6 Mon Sep 17 00:00:00 2001 From: Tim McMullan Date: Thu, 4 May 2023 08:49:51 -0600 Subject: [PATCH 01/81] Start NEWS for v22.05.10. --- NEWS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS b/NEWS index 91f513ae48f..e7b28dd24fb 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,9 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and administrators. +* Changes in Slurm 22.05.10 +=========================== + * Changes in Slurm 22.05.9 ========================== -- Allocate correct number of sockets when requesting gres and running with From 6dce43eebba7df74997412f2f11bf1c5d474f202 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 8 May 2023 14:26:47 -0600 Subject: [PATCH 02/81] Testsuite - add test_122_2.py Test job array with gres Bug 14327 --- testsuite/python/tests/test_122_2.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 testsuite/python/tests/test_122_2.py diff --git a/testsuite/python/tests/test_122_2.py b/testsuite/python/tests/test_122_2.py new file mode 100644 index 00000000000..e680ba3d8be --- /dev/null +++ b/testsuite/python/tests/test_122_2.py @@ -0,0 +1,34 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import atf +import pytest +import re + + +# Setup +@pytest.fixture(scope="module", autouse=True) +def setup(): + atf.require_auto_config('wants to add custom generic resources') + atf.require_config_parameter('GresTypes', 'r1') + atf.require_nodes(1, [('Gres', 'r1:1')]) + atf.require_slurm_running() + + +def test_job_array_with_gres(): + """Test creating job array with gres requested""" + + output_pattern = f"{atf.module_tmp_path}/%A-%a.out" + job_id = atf.submit_job( + f"--array=1-2 --gres=r1:1 --wrap='echo DONE' \ + --output={output_pattern}") + output_file_1 = f"{atf.module_tmp_path}/{job_id}-1.out" + output_file_2 = f"{atf.module_tmp_path}/{job_id}-2.out" + atf.wait_for_job_state(job_id, 'DONE', timeout=5, fatal=True) + with open(output_file_1, 'r') as f: + output = f.read() + assert 'DONE' in output, 'Expect job to finish' + with open(output_file_2, 'r') as f: + output = f.read() + assert 'DONE' in output, 'Expect job to finish' + assert atf.is_slurmctld_running() From 35123dbb69c03377e7e2702b99dad4f70344135f Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Tue, 9 May 2023 10:18:28 -0600 Subject: [PATCH 03/81] Testsuite - Add test_122_2.py to README Bug 14327 --- testsuite/README | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/README b/testsuite/README index 350a5e3393d..0313622b2ab 100644 --- a/testsuite/README +++ b/testsuite/README @@ -954,6 +954,7 @@ test_121_2 /features/gres/test_mps--gres=mps_by_job.py test_122_# Testing of job_arrays. =================================== test_122_1 /features/job_arrays/test_basics.py +test_122_2 Test job array with gres test_123_# Testing of reservations. ===================================== From 638874e8a79db2ccc751732c00432f9b5664ca24 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 8 May 2023 14:56:06 -0600 Subject: [PATCH 04/81] Testsuite - add test_144_4.py Test gres with file or type specified Bug 12633 --- testsuite/python/tests/test_144_4.py | 63 ++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 testsuite/python/tests/test_144_4.py diff --git a/testsuite/python/tests/test_144_4.py b/testsuite/python/tests/test_144_4.py new file mode 100644 index 00000000000..7176b54f7c3 --- /dev/null +++ b/testsuite/python/tests/test_144_4.py @@ -0,0 +1,63 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import atf +import pytest +import re + + +# Setup +@pytest.fixture(scope="module", autouse=True) +def setup(): + atf.require_auto_config('wants to create custom gres and resource file') + atf.require_config_parameter('SelectType', 'select/cons_tres') + atf.require_config_parameter('AccountingStorageType', 'accounting_storage/slurmdbd') + atf.require_config_parameter('GresTypes', 'r1,r2') + atf.require_nodes(1, [('Gres', 'r1:1,r2:a:1'), ('CPUs', '2')]) + resource_file = f"{atf.module_tmp_path}/resource1" + atf.run_command(f"touch {resource_file}") + atf.require_config_parameter( + 'Name', + {'r1' : {'File' : resource_file}, 'r2' : {'Type' : 'a'}}, + source='gres') + atf.require_slurm_running() + + +def test_gres_alloc_dealloc_file(): + """Test alloc/dealloc of gres when file is set, but not type""" + + alloc = atf.run_command_output( + 'salloc --gres=r1 scontrol show nodes node2 -d | grep GresUsed', + fatal=True) + assert 'r1:1' in alloc, 'Expect allocation of gres with file set' + dealloc = atf.run_command_output( + 'scontrol show nodes node2 -d | grep GresUsed') + assert 'r1:0' in dealloc, 'Expect deallocation of gres with file set' + + +def test_gres_alloc_dealloc_type(): + """Test alloc/dealloc of gres when type is set, but not file""" + + alloc = atf.run_command_output( + 'salloc --gres=r2:a:1 scontrol show nodes node2 -d | grep GresUsed', + fatal=True) + assert 'r2:a:1' in alloc, 'Expect allocation of gres with type set' + dealloc = atf.run_command_output( + 'scontrol show nodes node2 -d | grep GresUsed') + assert 'r2:a:0' in dealloc, 'Expect deallocation of gres with type set' + + +def test_gres_overlap(): + """Test gres without file and --overlap""" + + output_file = f"{atf.module_tmp_path}/out" + job_id = atf.submit_job(f"-wnode2 -N1 --gres=r2:1 \ + --output={output_file} --wrap='\ + srun --overlap --gres=r2:1 hostname &\ + srun --overlap --gres=r2:1 hostname &\ + wait'", fatal=True) + atf.wait_for_job_state(job_id, 'DONE') + step_0 = atf.run_command_output(f"sacct -j {job_id}.0") + assert 'COMPLETED' in step_0, 'Expect first step to finish' + step_1 = atf.run_command_output(f"sacct -j {job_id}.1") + assert 'COMPLETED' in step_1, 'Expect second step to finish' From d1e7b13104c50a0bf5799a45faa0fb8237d7432e Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 10 May 2023 11:29:48 -0600 Subject: [PATCH 05/81] Testsuite - Add test_144_4.py to README This only adds 144_4 in the 144 family as the others are in 23.02+ Bug 12633 --- testsuite/README | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/README b/testsuite/README index 0313622b2ab..0bb1f2c7ea6 100644 --- a/testsuite/README +++ b/testsuite/README @@ -1022,3 +1022,7 @@ test_138_1 /stress/test_stdin_broadcast.py test_143_# Testing --switches. ================================ test_143_1 Test switches with topology/tree + select/cons_tres + +test_144_# Testing of gres. +================================ +test_144_4 Test gres with file or type and --overlap From c7db6ffbbd4fba69ec02d94a201a8995e9d3e156 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 12 May 2023 15:25:13 +0200 Subject: [PATCH 06/81] Testsuite - Improve test3.13 not assuming that uid 1 is available Bug 16629 --- testsuite/expect/test3.13 | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/testsuite/expect/test3.13 b/testsuite/expect/test3.13 index bcce2012157..c748010bb6b 100755 --- a/testsuite/expect/test3.13 +++ b/testsuite/expect/test3.13 @@ -42,6 +42,24 @@ proc cleanup {} { cancel_job $job_id } +proc get_wrong_uid {} { + global bin_id re_word_str number + + set wrong_uid 1 + set slurm_uid 1 + set my_uid [get_my_uid] + regexp "(${re_word_str})\\(($number)\\)" [get_config_param "SlurmUser"] - slurm_user slurm_uid + set max_uid [expr min($my_uid, $slurm_uid)] + while {$wrong_uid < $max_uid} { + if {![run_command_status "$bin_id $wrong_uid"]} { + return $wrong_uid + } + incr wrong_uid + } + + fail "Unable to find an usable wrong UID" +} + # # Build input script file # @@ -107,7 +125,8 @@ expect { # log_info "Test 1" set no_jobs 0 -spawn $scontrol update JobName=$script_name UserID=1 Priority=$new_prio +set wrong_uid [get_wrong_uid] +spawn $scontrol update JobName=$script_name UserID=$wrong_uid Priority=$new_prio expect { -re "No jobs with" { set no_jobs 1 From c82a97bca4158088f8984939c537d4751c86ddba Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 12 May 2023 15:36:34 +0200 Subject: [PATCH 07/81] Testsuite - Improve test3.13 using submit_job and run_command Bug 16629 --- testsuite/expect/test3.13 | 100 +++++--------------------------------- 1 file changed, 12 insertions(+), 88 deletions(-) diff --git a/testsuite/expect/test3.13 b/testsuite/expect/test3.13 index c748010bb6b..cab48016f95 100755 --- a/testsuite/expect/test3.13 +++ b/testsuite/expect/test3.13 @@ -35,6 +35,7 @@ set file_in "$test_dir/$script_name" set job_id 0 set new_prio 1 set read_priority -1 +set my_uid [get_my_uid] proc cleanup {} { global job_id @@ -43,11 +44,10 @@ proc cleanup {} { } proc get_wrong_uid {} { - global bin_id re_word_str number + global bin_id re_word_str number my_uid set wrong_uid 1 set slurm_uid 1 - set my_uid [get_my_uid] regexp "(${re_word_str})\\(($number)\\)" [get_config_param "SlurmUser"] - slurm_user slurm_uid set max_uid [expr min($my_uid, $slurm_uid)] while {$wrong_uid < $max_uid} { @@ -68,114 +68,38 @@ make_bash_script $file_in "$srun $bin_sleep 60" # # Submit a job so we have something to work with # -spawn $sbatch --output=/dev/null --error=/dev/null -t1 --hold $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "srun not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "srun failed to initiate job" -} +set job_id [submit_job -fail "--output=/dev/null --error=/dev/null -t1 --hold $file_in"] # # Record that job's state # -spawn $scontrol show job $job_id -expect { - -re "Priority=($number)" { - set read_priority $expect_out(1,string) - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -subtest {$read_priority == 0} "Verify job's original hold priority" +subtest {[get_job_param $job_id "Priority"] == 0} "Verify job's original hold priority" # # Change that job's priority # -spawn $scontrol release $job_id -expect { - -re "slurm_update error: Access.*denied" { - cancel_job $job_id - skip "User not authorized to modify jobs" - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } +set output [run_command_output "$scontrol release $job_id"] +if {[regexp "slurm_update error: Access.*denied" $output]} { + skip "User not authorized to modify jobs" } # # Test modification of job with bad JobName/UserID specification # log_info "Test 1" -set no_jobs 0 set wrong_uid [get_wrong_uid] -spawn $scontrol update JobName=$script_name UserID=$wrong_uid Priority=$new_prio -expect { - -re "No jobs with" { - set no_jobs 1 - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -subtest {$no_jobs != 0} "Verify bad JobName/UserID error" +set output [run_command_output "$scontrol update JobName=$script_name UserID=$wrong_uid Priority=$new_prio"] +subtest {[regexp "No jobs with" $output]} "Verify bad JobName/UserID error" # # Change that job's priority # log_info "Test 2" set no_jobs 0 -set my_uid [get_my_uid] -spawn $scontrol update JobName=$script_name UserID=$my_uid Priority=$new_prio -expect { - -re "No jobs with" { - set no_jobs 1 - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -subtest {$no_jobs == 0} "Verify absence of bad JobName/UserID error" +set output [run_command_output "$scontrol update JobName=$script_name UserID=$my_uid Priority=$new_prio"] +subtest {![regexp "No jobs with" $output]} "Verify absence of bad JobName/UserID error" # # Validate that job's new priority # -spawn $scontrol show job $job_id -expect { - -re "Priority=($number)" { - set read_priority $expect_out(1,string) - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -subtest {$read_priority != 0} "Validate the job's new priority" +subtest {[get_job_param $job_id "Priority"] != 0} "Validate the job's new priority" From a88697e09b02b1037c6ceccac984008bf4879d02 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 12 May 2023 16:01:36 +0200 Subject: [PATCH 08/81] Testsuite - Improve test3.13 using run_command flags Bug 16629 --- testsuite/expect/test3.13 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/expect/test3.13 b/testsuite/expect/test3.13 index cab48016f95..1f22765f588 100755 --- a/testsuite/expect/test3.13 +++ b/testsuite/expect/test3.13 @@ -51,7 +51,7 @@ proc get_wrong_uid {} { regexp "(${re_word_str})\\(($number)\\)" [get_config_param "SlurmUser"] - slurm_user slurm_uid set max_uid [expr min($my_uid, $slurm_uid)] while {$wrong_uid < $max_uid} { - if {![run_command_status "$bin_id $wrong_uid"]} { + if {![run_command_status -nolog -none "$bin_id $wrong_uid"]} { return $wrong_uid } incr wrong_uid @@ -88,7 +88,7 @@ if {[regexp "slurm_update error: Access.*denied" $output]} { # log_info "Test 1" set wrong_uid [get_wrong_uid] -set output [run_command_output "$scontrol update JobName=$script_name UserID=$wrong_uid Priority=$new_prio"] +set output [run_command_output -xfail "$scontrol update JobName=$script_name UserID=$wrong_uid Priority=$new_prio"] subtest {[regexp "No jobs with" $output]} "Verify bad JobName/UserID error" # @@ -96,7 +96,7 @@ subtest {[regexp "No jobs with" $output]} "Verify bad JobName/UserID error" # log_info "Test 2" set no_jobs 0 -set output [run_command_output "$scontrol update JobName=$script_name UserID=$my_uid Priority=$new_prio"] +set output [run_command_output -none "$scontrol update JobName=$script_name UserID=$my_uid Priority=$new_prio"] subtest {![regexp "No jobs with" $output]} "Verify absence of bad JobName/UserID error" # From 0da7d694ca5bf73bc88eb61c496ab6b2528725ce Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Mon, 15 May 2023 10:44:22 -0600 Subject: [PATCH 09/81] Testsuite - Update readme gres to mps test_121_# is mislabeled and should be mps not gres --- testsuite/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/README b/testsuite/README index 0bb1f2c7ea6..e9c6a5093f0 100644 --- a/testsuite/README +++ b/testsuite/README @@ -946,7 +946,7 @@ test_119_# Testing of contribs. test_119_1 /contribs/openlava/test_lsid.py test_119_2 /contribs/torque/qsub/test_basic.py -test_121_# Testing of gres. +test_121_# Testing of mps. ============================= test_121_1 /features/gres/test_mps_options.py test_121_2 /features/gres/test_mps--gres=mps_by_job.py From dd3451e526278bd746c58284d757369c7fc9f4e8 Mon Sep 17 00:00:00 2001 From: Danny Auble Date: Tue, 18 Apr 2023 11:37:37 -0600 Subject: [PATCH 10/81] Testsuite - Add lineage test instead of lft/rgt for globals. Bug 16344 --- testsuite/expect/globals | 65 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 90ff58aa6de..cf027b96f39 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -3662,13 +3662,75 @@ proc make_bash_script { script_name script_contents } { proc check_acct_associations { } { global sacctmgr number re_word_str + # 2 versions after 23.11 we can remove the below function + # until then we should check older clusters lft/rgt + set rc [check_acct_associations_lft] + + if { !$rc } { + return $rc; + } + + log_user 0 + log_debug "Sanity-Checking Associations" + # + # Use sacctmgr to check associations + # + spawn $sacctmgr -n -p list assoc wopi wopl withd format=lineage,cluster + expect { + -re "($re_word_str)\\|($re_word_str)\\|" { + # Here we are checking if we have duplicates and + # setting up an array to check for holes later + + set lineage $expect_out(1,string) + set cluster $expect_out(2,string) + set first [info exists found($cluster,$lineage)] + + if { $first } { + log_error "$cluster found lineage $lineage again" + set rc false + } else { + set found($cluster,$lineage) 1 + } + exp_continue + } + timeout { + fail "sacctmgr add not responding" + } + eof { + wait + } + } + + log_user 1 + return $rc +} + +proc check_acct_associations_lft { } { + global sacctmgr number re_word_str + set rc true log_user 0 + log_debug "Sanity-Checking Associations" + + set clusters "" + + spawn $sacctmgr show cluster format=cluster,rpc -p + expect { + -re "($re_word_str)\\|($number)\\|" { + # 9984 == 23.02, the last version where lft/rgt matter + if { $expect_out(2,string) > 9984 } { + exp_continue + } + set clusters [ concat $clusters "," $expect_out(1,string) ] + exp_continue + } + } + # # Use sacctmgr to check associations # - spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt,cluster + spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt,cluster clusters="$clusters" expect { -re "($number)\\|($number)\\|($re_word_str)\\|" { # Here we are checking if we have duplicates and @@ -3726,7 +3788,6 @@ proc check_acct_associations { } { return $rc } - ################################################################ # # NAME From 72d149667f5a6f63a2823d3b9aab5c5f8e47bc8d Mon Sep 17 00:00:00 2001 From: Danny Auble Date: Wed, 26 Apr 2023 15:29:32 -0600 Subject: [PATCH 11/81] Testsuite - make a better cleanup for accounting tests before they start. --- testsuite/expect/test21.11 | 13 +++-- testsuite/expect/test21.13 | 12 ++--- testsuite/expect/test21.14 | 12 ++--- testsuite/expect/test21.15 | 3 +- testsuite/expect/test21.16 | 12 ++--- testsuite/expect/test21.17 | 12 ++--- testsuite/expect/test21.18 | 12 ++--- testsuite/expect/test21.19 | 12 ++--- testsuite/expect/test21.20 | 17 ++++--- testsuite/expect/test21.21 | 2 + testsuite/expect/test21.22 | 102 ++++++++++++++++++------------------- testsuite/expect/test21.23 | 1 + testsuite/expect/test21.24 | 12 ++--- testsuite/expect/test21.26 | 6 +++ testsuite/expect/test21.27 | 6 +++ testsuite/expect/test21.28 | 12 ++--- testsuite/expect/test21.29 | 12 ++--- testsuite/expect/test21.30 | 7 +-- testsuite/expect/test21.31 | 5 +- testsuite/expect/test21.32 | 12 ++--- testsuite/expect/test21.33 | 6 +++ testsuite/expect/test21.41 | 10 ++-- testsuite/expect/test21.8 | 2 + testsuite/expect/test22.1 | 2 + testsuite/expect/test37.1 | 2 + 25 files changed, 163 insertions(+), 141 deletions(-) diff --git a/testsuite/expect/test21.11 b/testsuite/expect/test21.11 index 2aefd6abe36..d884cd6f40c 100755 --- a/testsuite/expect/test21.11 +++ b/testsuite/expect/test21.11 @@ -81,13 +81,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_acct "" "$ta1,$ta2" -remove_cluster "$tc1,$tc2" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global sacctmgr ta1 ta2 tc1 tc2 acct @@ -96,6 +89,12 @@ proc cleanup {} { run_command "$sacctmgr -i delete qos $acct(DefaultQos)" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # # Add test Cluster # diff --git a/testsuite/expect/test21.13 b/testsuite/expect/test21.13 index b844365fe16..869ddcb46b4 100755 --- a/testsuite/expect/test21.13 +++ b/testsuite/expect/test21.13 @@ -88,12 +88,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global tc1 tc2 ta1 ta2 ta3 @@ -102,6 +96,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2" ""] { fail "Unable to add clusters ($tc1,$tc2)" diff --git a/testsuite/expect/test21.14 b/testsuite/expect/test21.14 index 9d9b02311a3..cbd01d22286 100755 --- a/testsuite/expect/test21.14 +++ b/testsuite/expect/test21.14 @@ -109,12 +109,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global ta1 ta2 ta3 tc1 tc2 @@ -123,6 +117,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2" ""] { fail "Unable to add clusters ($tc1,$tc2)" diff --git a/testsuite/expect/test21.15 b/testsuite/expect/test21.15 index 0e46432b198..c651e82ec7b 100755 --- a/testsuite/expect/test21.15 +++ b/testsuite/expect/test21.15 @@ -108,8 +108,7 @@ proc cleanup {} { remove_qos "$dqos1" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" +cleanup if {$access_err != 0} { skip "Not authorized to perform this test" } diff --git a/testsuite/expect/test21.16 b/testsuite/expect/test21.16 index 41eaf5de4d2..6d73b7a99fc 100755 --- a/testsuite/expect/test21.16 +++ b/testsuite/expect/test21.16 @@ -95,12 +95,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global dqos1 ta1 ta2 ta3 tc1 tc2 tc3 tu1 tu2 tu3 tu4 tu5 @@ -110,6 +104,12 @@ proc cleanup {} { remove_qos "$dqos1" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2,$tc3" [array get clus_req]] { fail "Unable to add clusters ($tc1,$tc2,$tc3)" diff --git a/testsuite/expect/test21.17 b/testsuite/expect/test21.17 index d879cb68947..4f1b7b03851 100755 --- a/testsuite/expect/test21.17 +++ b/testsuite/expect/test21.17 @@ -93,12 +93,6 @@ if { [get_config_param -dbd "AllowNoDefAcct"] eq "Yes" } { set def_acct $ta1 } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global ta1 ta2 ta3 tc1 tc2 tc3 tu1 tu2 tu3 @@ -107,6 +101,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2,$tc3" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2,$tc3" [array get clus_req]] { fail "Unable to add clusters ($tc1,$tc2,$tc3)" diff --git a/testsuite/expect/test21.18 b/testsuite/expect/test21.18 index fd8bd8a3061..19f138d3ebf 100755 --- a/testsuite/expect/test21.18 +++ b/testsuite/expect/test21.18 @@ -107,12 +107,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global ta1 ta2 ta3 tc1 tc2 tc3 tu1 tu2 tu3 @@ -121,6 +115,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2,$tc3" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2,$tc3" [array get clus_req]] { fail "Unable to add clusters ($tc1,$tc2,$tc3)" diff --git a/testsuite/expect/test21.19 b/testsuite/expect/test21.19 index 49d8c1a8bb4..277d60dd073 100755 --- a/testsuite/expect/test21.19 +++ b/testsuite/expect/test21.19 @@ -111,12 +111,6 @@ if { [get_config_param -dbd "AllowNoDefAcct"] eq "Yes" } { set def_acct $nm1 } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global nm1 nm2 nm3 tc1 tc2 tc3 us1 us2 us3 @@ -125,6 +119,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2,$tc3" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2,$tc3" [array get clus_req]] { fail "Unable to add clusters ($tc1,$tc2,$tc3)" diff --git a/testsuite/expect/test21.20 b/testsuite/expect/test21.20 index 4ea22f142e7..a6929d57dd8 100755 --- a/testsuite/expect/test21.20 +++ b/testsuite/expect/test21.20 @@ -95,19 +95,20 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global ta1 ta2 ta3 tc1 tc2 tc3 tu1 tu2 tu3 qs1 qs2 qs3 - remove_user "" "" "$tu1,$tu2,$tu3" - remove_acct "" "$ta1,$ta2,$ta3" remove_qos "$qs1,$qs2,$qs3" remove_cluster "$tc1,$tc2,$tc3" + remove_user "" "" "$tu1,$tu2,$tu3" + remove_acct "" "$ta1,$ta2,$ta3" +} + +# Make sure we have a clean system and permission to do this work +cleanup + +if {$access_err != 0} { + skip "Not authorized to perform this test" } # Add cluster diff --git a/testsuite/expect/test21.21 b/testsuite/expect/test21.21 index 38c15eebd80..383879b56df 100755 --- a/testsuite/expect/test21.21 +++ b/testsuite/expect/test21.21 @@ -131,6 +131,8 @@ proc cleanup { } { run_command "$sacctmgr -i delete qos $test_qos" } +cleanup + set got_node 0 spawn $srun -N1 -t1 printenv SLURM_NODELIST expect { diff --git a/testsuite/expect/test21.22 b/testsuite/expect/test21.22 index 692b66f3ea1..3b1b0a147a5 100755 --- a/testsuite/expect/test21.22 +++ b/testsuite/expect/test21.22 @@ -197,57 +197,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -run_command "$bin_rm -f delete $file_in $file_in2 $file_in3" - -# -# Build input script file - to create original associations -# -exec echo "$clu - '$tc1':$class=$class1:$fs=$fs6:$gm=$gm6:$gc=$gc6:$gj=$gj6:$gn=$gn6:$gs=$gs6:$gw=$gw6:$mm=$mm6:$mc=$mc6:$mj=$mj6:$mn=$mn6:$ms=$ms6:$mw=$mw6:$qs=$qs1" >>$file_in -exec echo "$par - '$roo'" >>$file_in -exec echo "$acc - '$ta1':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in -exec echo "$acc - '$ta2':$dsc=$ds2:$org=$or2:$fs=$fs4:$gm=$gm4:$gc=$gc4:$gj=$gj4:$gn=$gn4:$gs=$gs4:$gw=$gw4:$mm=$mm4:$mc=$mc4:$mj=$mj4:$mn=$mn4:$ms=$ms4:$mw=$mw4:$qs=$qs1" >>$file_in -exec echo "$par - '$ta1'" >>$file_in -exec echo "$acc - '$ta3':$dsc=$ds3:$org=$or3:$fs=$fs3:$gm=$gm3:$gc=$gc3:$gj=$gj3:$gn=$gn3:$gs=$gs3:$gw=$gw3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in -exec echo "$par - '$ta2'" >>$file_in -exec echo "$usr - '$tu1':$coo=$ta2:$dac=$ta2:$fs=$fs1:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$alo" >>$file_in -exec echo "$par - '$ta3'" >>$file_in -exec echo "$usr - '$tu2':$coo=$ta3:$dac=$ta3:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1:$al=$ala" >>$file_in - -# -# Second input file - to modify and add associations to the original -# -exec echo "$clu - '$tc1':$class=$class2" >>$file_in2 -exec echo "$par - '$roo'" >>$file_in2 -exec echo "$acc - '$ta1'" >>$file_in2 -exec echo "$acc - '$ta3':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in2 -exec echo "$par - '$ta1'" >>$file_in2 -exec echo "$acc - '$ta2'" >>$file_in2 -exec echo "$par - '$ta2'" >>$file_in2 -exec echo "$usr - '$tu3':$coo=$ta1,$ta2,$ta3:$dac=$ta2:$fs=$fs2:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$ala" >>$file_in2 -exec echo "$par - '$ta3'" >>$file_in2 -exec echo "$usr - '$tu2':$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1:$al=$alo" >>$file_in2 -exec echo "$usr - '$tu3':$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in2 -exec echo "$par - '$ta1'" >>$file_in2 -exec echo "$usr - '$tu3':$dac=$ta1:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1" >>$file_in2 - -# -# Third input file - to replace all previous -# -exec echo "$clu - '$tc1':$class=$class3:$fs=$fs6:$gm=$gm6:$gc=$gc6:$gj=$gj6:$gn=$gn6:$gs=$gs6:$gw=$gw6:$mm=$mm6:$mc=$mc6:$mj=$mj6:$mn=$mn6:$ms=$ms6:$mw=$mw6:$qs=$qs1" >>$file_in3 -exec echo "$par - '$roo'" >>$file_in3 -exec echo "$acc - '$ta1':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 -exec echo "$acc - '$ta3':$dsc=$ds3:$org=$or3:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 -exec echo "$par - '$ta1'" >>$file_in3 -exec echo "$acc - '$ta3':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 -exec echo "$acc - '$ta2':$dsc=$ds2:$org=$or2:$fs=$fs4:$gm=$gm4:$gc=$gc4:$gj=$gj4:$gn=$gn4:$gs=$gs4:$gw=$gw4:$mm=$mm4:$mc=$mc4:$mj=$mj4:$mn=$mn4:$ms=$ms4:$mw=$mw4:$qs=$qs1:$al=$alo" >>$file_in3 -exec echo "$usr - '$tu3':$coo=$ta1,$ta2,$ta3:$dac=$ta1:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1:$al=$ala" >>$file_in3 -exec echo "$par - '$ta2'" >>$file_in3 -exec echo "$usr - '$tu1':$coo=$ta2:$dac=$ta2:$fs=$fs1:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$alo" >>$file_in3 -exec echo "$usr - '$tu3':$dac=$ta2:$fs=$fs2:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1" >>$file_in3 -exec echo "$par - '$ta3'" >>$file_in3 -exec echo "$usr - '$tu2':$coo=$ta3:$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in3 -exec echo "$usr - '$tu3':$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in3 - # # Use sacctmgr to remove the test cluster # @@ -448,11 +397,60 @@ proc cleanup {} { } # Make sure we have a clean system and permission to do this work -_remove_cluster "$tc1" +cleanup if {$access_err != 0} { skip "Not authorized to perform this test" } +# +# Build input script file - to create original associations +# +exec echo "$clu - '$tc1':$class=$class1:$fs=$fs6:$gm=$gm6:$gc=$gc6:$gj=$gj6:$gn=$gn6:$gs=$gs6:$gw=$gw6:$mm=$mm6:$mc=$mc6:$mj=$mj6:$mn=$mn6:$ms=$ms6:$mw=$mw6:$qs=$qs1" >>$file_in +exec echo "$par - '$roo'" >>$file_in +exec echo "$acc - '$ta1':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in +exec echo "$acc - '$ta2':$dsc=$ds2:$org=$or2:$fs=$fs4:$gm=$gm4:$gc=$gc4:$gj=$gj4:$gn=$gn4:$gs=$gs4:$gw=$gw4:$mm=$mm4:$mc=$mc4:$mj=$mj4:$mn=$mn4:$ms=$ms4:$mw=$mw4:$qs=$qs1" >>$file_in +exec echo "$par - '$ta1'" >>$file_in +exec echo "$acc - '$ta3':$dsc=$ds3:$org=$or3:$fs=$fs3:$gm=$gm3:$gc=$gc3:$gj=$gj3:$gn=$gn3:$gs=$gs3:$gw=$gw3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in +exec echo "$par - '$ta2'" >>$file_in +exec echo "$usr - '$tu1':$coo=$ta2:$dac=$ta2:$fs=$fs1:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$alo" >>$file_in +exec echo "$par - '$ta3'" >>$file_in +exec echo "$usr - '$tu2':$coo=$ta3:$dac=$ta3:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1:$al=$ala" >>$file_in + +# +# Second input file - to modify and add associations to the original +# +exec echo "$clu - '$tc1':$class=$class2" >>$file_in2 +exec echo "$par - '$roo'" >>$file_in2 +exec echo "$acc - '$ta1'" >>$file_in2 +exec echo "$acc - '$ta3':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in2 +exec echo "$par - '$ta1'" >>$file_in2 +exec echo "$acc - '$ta2'" >>$file_in2 +exec echo "$par - '$ta2'" >>$file_in2 +exec echo "$usr - '$tu3':$coo=$ta1,$ta2,$ta3:$dac=$ta2:$fs=$fs2:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$ala" >>$file_in2 +exec echo "$par - '$ta3'" >>$file_in2 +exec echo "$usr - '$tu2':$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1:$al=$alo" >>$file_in2 +exec echo "$usr - '$tu3':$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in2 +exec echo "$par - '$ta1'" >>$file_in2 +exec echo "$usr - '$tu3':$dac=$ta1:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1" >>$file_in2 + +# +# Third input file - to replace all previous +# +exec echo "$clu - '$tc1':$class=$class3:$fs=$fs6:$gm=$gm6:$gc=$gc6:$gj=$gj6:$gn=$gn6:$gs=$gs6:$gw=$gw6:$mm=$mm6:$mc=$mc6:$mj=$mj6:$mn=$mn6:$ms=$ms6:$mw=$mw6:$qs=$qs1" >>$file_in3 +exec echo "$par - '$roo'" >>$file_in3 +exec echo "$acc - '$ta1':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 +exec echo "$acc - '$ta3':$dsc=$ds3:$org=$or3:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 +exec echo "$par - '$ta1'" >>$file_in3 +exec echo "$acc - '$ta3':$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 +exec echo "$acc - '$ta2':$dsc=$ds2:$org=$or2:$fs=$fs4:$gm=$gm4:$gc=$gc4:$gj=$gj4:$gn=$gn4:$gs=$gs4:$gw=$gw4:$mm=$mm4:$mc=$mc4:$mj=$mj4:$mn=$mn4:$ms=$ms4:$mw=$mw4:$qs=$qs1:$al=$alo" >>$file_in3 +exec echo "$usr - '$tu3':$coo=$ta1,$ta2,$ta3:$dac=$ta1:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1:$al=$ala" >>$file_in3 +exec echo "$par - '$ta2'" >>$file_in3 +exec echo "$usr - '$tu1':$coo=$ta2:$dac=$ta2:$fs=$fs1:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$alo" >>$file_in3 +exec echo "$usr - '$tu3':$dac=$ta2:$fs=$fs2:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1" >>$file_in3 +exec echo "$par - '$ta3'" >>$file_in3 +exec echo "$usr - '$tu2':$coo=$ta3:$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in3 +exec echo "$usr - '$tu3':$dac=$ta3:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in3 + set matches 0 spawn $sacctmgr -i load $file_in expect { diff --git a/testsuite/expect/test21.23 b/testsuite/expect/test21.23 index a0f28d243ef..dac8f225eff 100755 --- a/testsuite/expect/test21.23 +++ b/testsuite/expect/test21.23 @@ -68,6 +68,7 @@ if {[get_admin_level] ne "Administrator"} { set accounting_storage_type [get_config_param "AccountingStorageType"] +cleanup # # Identify the user and his current default account diff --git a/testsuite/expect/test21.24 b/testsuite/expect/test21.24 index 08891273c01..746452da332 100755 --- a/testsuite/expect/test21.24 +++ b/testsuite/expect/test21.24 @@ -249,12 +249,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global qs2 ta2 ta3 ta4 tc1 tc2 tc3 tu1 tu2 tu3 tu4 @@ -264,6 +258,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2,$tc3" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add qos if [add_qos "$qs2" ""] { fail "Unable to add qos ($qs2)" diff --git a/testsuite/expect/test21.26 b/testsuite/expect/test21.26 index 97792b1e89e..b90e6cf2d24 100755 --- a/testsuite/expect/test21.26 +++ b/testsuite/expect/test21.26 @@ -91,6 +91,12 @@ proc cleanup {} { remove_qos "$qos1" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # # Add a test cluster for testing # diff --git a/testsuite/expect/test21.27 b/testsuite/expect/test21.27 index 38c88d04aba..bcbf741c4e2 100755 --- a/testsuite/expect/test21.27 +++ b/testsuite/expect/test21.27 @@ -68,6 +68,12 @@ proc cleanup {} { remove_cluster $tc2 } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # # Build test associations # diff --git a/testsuite/expect/test21.28 b/testsuite/expect/test21.28 index 9263b91ba81..efb57848eb8 100755 --- a/testsuite/expect/test21.28 +++ b/testsuite/expect/test21.28 @@ -79,12 +79,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# Make sure we have a clean system and permission to do this work -remove_cluster "$tc1,$tc2,$tc3" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global ta1 ta2 ta3 tc1 tc2 tc3 tu1 tu2 tu3 @@ -93,6 +87,12 @@ proc cleanup {} { remove_cluster "$tc1,$tc2,$tc3" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add cluster if [add_cluster "$tc1,$tc2,$tc3" [array get clus_req]] { fail "Unable to add clusters ($tc1,$tc2,$tc3)" diff --git a/testsuite/expect/test21.29 b/testsuite/expect/test21.29 index e24514e7eb7..bda963e4b22 100755 --- a/testsuite/expect/test21.29 +++ b/testsuite/expect/test21.29 @@ -67,18 +67,18 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator.\nUse: sacctmgr mod user \$USER set admin=admin" } -# verify a clean QOS entity and permission to do this work -remove_qos "$qos_name" -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global qos_name remove_qos "$qos_name" } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add qos add_qos $qos_name [array get qos] diff --git a/testsuite/expect/test21.30 b/testsuite/expect/test21.30 index 1ca19eafaf9..80bbf6036ca 100755 --- a/testsuite/expect/test21.30 +++ b/testsuite/expect/test21.30 @@ -259,11 +259,8 @@ proc cleanup {} { } } -# Remove any vestigial accounts or qos -run_command -none "$sacctmgr -i delete qos $qostest" - -# Delete account -run_command -none "$sacctmgr -i delete account $acct" +# Make sure we have a clean system +cleanup # Gets user set user_name [get_my_user_name] diff --git a/testsuite/expect/test21.31 b/testsuite/expect/test21.31 index 3b2a47145ac..17cde73dde0 100755 --- a/testsuite/expect/test21.31 +++ b/testsuite/expect/test21.31 @@ -109,9 +109,8 @@ proc cleanup {} { remove_cluster $tc2 } -# Verify a clean system and permission to do this work -remove_cluster "$tc2" -remove_res "$sr1" +# Make sure we have a clean system and permission to do this work +cleanup if {$access_err != 0} { skip "Not authorized to perform this test" } diff --git a/testsuite/expect/test21.32 b/testsuite/expect/test21.32 index 2463e73f8ec..ed33db2596d 100755 --- a/testsuite/expect/test21.32 +++ b/testsuite/expect/test21.32 @@ -120,18 +120,18 @@ if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} skip "Not using accounting_storage/slurmdbd" } -# clean it up (and check to make sure we can do this test -remove_qos $qos_names_str -if {$access_err != 0} { - skip "Not authorized to perform this test" -} - proc cleanup {} { global qos_names_str remove_qos $qos_names_str } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + # Add a few QoS if [add_qos $qos_names_str ""] { fail "Unable to add QOS ($qos_names_str)" diff --git a/testsuite/expect/test21.33 b/testsuite/expect/test21.33 index f17e2a98fcf..8bd1808ce45 100755 --- a/testsuite/expect/test21.33 +++ b/testsuite/expect/test21.33 @@ -90,6 +90,12 @@ proc cleanup { } { remove_cluster $test_cluster } +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} + set added 0 spawn $sacctmgr add -i cluster $test_cluster expect { diff --git a/testsuite/expect/test21.41 b/testsuite/expect/test21.41 index c0961522db3..57cda5b3d84 100755 --- a/testsuite/expect/test21.41 +++ b/testsuite/expect/test21.41 @@ -236,9 +236,9 @@ proc cleanup {} { archive_load $sql_rem_clus1 archive_load $sql_rem_clus2 + remove_cluster $clusters remove_user $clusters $accounts $users remove_acct "" $accounts - remove_cluster $clusters exec $bin_rm -f $sql_in_clus1 $sql_in_clus2 $sql_rem_clus1 $sql_rem_clus2 } @@ -260,9 +260,11 @@ set user_req(wckey) $wckeys # # Remove previous test clusters, accounts and users # -remove_user $clusters $accounts $users -remove_acct "" $accounts -remove_cluster $clusters +# Make sure we have a clean system and permission to do this work +cleanup +if {$access_err != 0} { + skip "Not authorized to perform this test" +} # Add cluster if [add_cluster $cluster1 [array get clus_req]] { diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8 index 5017d6c0af6..fe2540088d9 100755 --- a/testsuite/expect/test21.8 +++ b/testsuite/expect/test21.8 @@ -84,6 +84,8 @@ proc cleanup {} { run_command "$sacctmgr -i delete qos $cluster(DefaultQos) $mod_cluster(DefaultQos)" } +cleanup + # # Create tmp QOS # diff --git a/testsuite/expect/test22.1 b/testsuite/expect/test22.1 index e59575877cc..7633a06f8d3 100755 --- a/testsuite/expect/test22.1 +++ b/testsuite/expect/test22.1 @@ -215,6 +215,8 @@ proc cleanup {} { remove_cluster "$cluster2" } +cleanup + # Add clusters if [add_cluster $cluster1 [array get clus_req]] { fail "Unable to add cluster 1" diff --git a/testsuite/expect/test37.1 b/testsuite/expect/test37.1 index dc46a916ba6..6cbe875f1ca 100755 --- a/testsuite/expect/test37.1 +++ b/testsuite/expect/test37.1 @@ -140,6 +140,8 @@ proc cleanup { } { set nothing 0 set matches 0 +cleanup + ######MAIN###### ##################################### # TEST: add federation with non-existant cluster(s) From 651f6e9075e2776d9fe7841a4c78e2b0de727681 Mon Sep 17 00:00:00 2001 From: Danny Auble Date: Tue, 2 May 2023 09:43:29 -0600 Subject: [PATCH 12/81] Testsuite - fix bad variable reference. --- testsuite/expect/test21.9 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9 index 8aa6908c8d1..53b77d1a778 100755 --- a/testsuite/expect/test21.9 +++ b/testsuite/expect/test21.9 @@ -88,6 +88,8 @@ proc cleanup {} { run_command "$sacctmgr -i delete qos $cluster(DefaultQos) $mod_cluster(DefaultQos)" } +cleanup + # # Create tmp QOS # @@ -106,7 +108,7 @@ expect { } } if {$qos_set != 1} { - fail "QOS ($dqos) was not created" + fail "QOS $cluster(DefaultQos) $mod_cluster(DefaultQos) was not created" } # From 4ae477a35b290ff626d9cc16d04cb93f2df733d4 Mon Sep 17 00:00:00 2001 From: Danny Auble Date: Wed, 10 May 2023 10:52:40 -0600 Subject: [PATCH 13/81] Testsuite - Alter tests to put the qos as part of the clusters. This makes it so the default qos is accessible. Bug 16344 --- testsuite/expect/test21.15 | 1 + testsuite/expect/test21.8 | 1 + testsuite/expect/test21.9 | 1 + 3 files changed, 3 insertions(+) diff --git a/testsuite/expect/test21.15 b/testsuite/expect/test21.15 index c651e82ec7b..2c46e6775f8 100755 --- a/testsuite/expect/test21.15 +++ b/testsuite/expect/test21.15 @@ -83,6 +83,7 @@ array set user_req2 { set user_req2(cluster) $tc1 set user_req2(account) $ta1 set user_req2(defaultqos) $dqos1 +set user_req2(qos) $dqos1 set timeout 60 diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8 index fe2540088d9..2587c844cea 100755 --- a/testsuite/expect/test21.8 +++ b/testsuite/expect/test21.8 @@ -49,6 +49,7 @@ set cluster(MaxSubmitJobs) 400 set cluster(MaxNodes) 200 set cluster(MaxWall) 01:00:00 set cluster(DefaultQos) "${test_name}_qos_1" +set cluster(Qos) "${test_name}_qos_1,${test_name}_qos_2" array set mod_cluster {} set mod_cluster(Fairshare) 1375 diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9 index 53b77d1a778..95c9b5786d6 100755 --- a/testsuite/expect/test21.9 +++ b/testsuite/expect/test21.9 @@ -51,6 +51,7 @@ set cluster(MaxSubmitJobs) 400 set cluster(MaxNodes) 200 set cluster(MaxWall) 01:00:00 set cluster(DefaultQos) "${test_name}_qos_1" +set cluster(Qos) "${test_name}_qos_1,${test_name}_qos_2" # Modified Cluster Limits array set mod_cluster {} From 6506811c490f200542d0b15b37a78621bb5ddc14 Mon Sep 17 00:00:00 2001 From: Danny Auble Date: Tue, 16 May 2023 09:43:04 -0600 Subject: [PATCH 14/81] Testsuite - Fix issue when running with extern step. It could delay the start of the job so the reason is 'Prolog' instead of 'None'. If it is running it is running for this test. We shouldn't care about the reason. Bug 16344 --- testsuite/expect/inc21.21_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/inc21.21_tests b/testsuite/expect/inc21.21_tests index efb33185dcd..6d179f54a6a 100644 --- a/testsuite/expect/inc21.21_tests +++ b/testsuite/expect/inc21.21_tests @@ -205,7 +205,7 @@ sleep 10" incr pending exp_continue } - -re "R.None" { + -re "R." { incr running exp_continue } From 0bd45992121dc04d5c78c54782c9c42dbbcf8c8e Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 5 Apr 2023 17:36:34 -0600 Subject: [PATCH 15/81] Testsuite - Remove Terminal writer in conftest.py Remove dependency on py.io.TerminalWriter for text coloring in conftest.py as it is no longer supported Bug 16203 --- testsuite/python/conftest.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/testsuite/python/conftest.py b/testsuite/python/conftest.py index 2d17ece04c5..04d87b7aafd 100644 --- a/testsuite/python/conftest.py +++ b/testsuite/python/conftest.py @@ -5,7 +5,6 @@ import logging import os import pathlib -from py.io import TerminalWriter import pwd import pytest import _pytest @@ -31,6 +30,14 @@ def pytest_addoption(parser): def color_log_level(level, **color_kwargs): + # Adapted from depricated py.io TerminalWriter source + # https://py.readthedocs.io/en/latest/_modules/py/_io/terminalwriter.html + _esctable = dict(black=30, red=31, green=32, yellow=33, + blue=34, purple=35, cyan=36, white=37, + Black=40, Red=41, Green=42, Yellow=43, + Blue=44, Purple=45, Cyan=46, White=47, + bold=1, light=2, blink=5, invert=7) + for handler in logging.getLogger().handlers: if isinstance(handler, _pytest.logging.LogCaptureHandler): formatter = handler.formatter @@ -39,9 +46,17 @@ def color_log_level(level, **color_kwargs): formatted_levelname = levelname_fmt % { 'levelname': logging.getLevelName(level) } - colorized_formatted_levelname = TerminalWriter().markup( - formatted_levelname, **color_kwargs + + esc = [] + for option in color_kwargs: + esc.append(_esctable[option]) + + colorized_formatted_levelname = ( + ''.join(['\x1b[%sm' % cod for cod in esc]) + + formatted_levelname + + '\x1b[0m' ) + formatter._level_to_fmt_mapping[level] = formatter.LEVELNAME_FMT_REGEX.sub( colorized_formatted_levelname, formatter._fmt ) From 0e8b64ef2d6be721ece46c4fa069356f0c71f752 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Wed, 17 May 2023 17:32:46 +0200 Subject: [PATCH 16/81] Testsuite - Fix test38.11 ensuring srun uses --jobid To really start a step after the salloc we cannot use a raw run_command but the expect native "send". As an alternative we can use the --jobid to ensure that steps are created in the right job. Actually, we don't even need salloc in this particular test. Bug 16639 --- testsuite/expect/test38.11 | 42 ++++++++++++-------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/testsuite/expect/test38.11 b/testsuite/expect/test38.11 index 3d2954f9109..3e750fdf5cc 100755 --- a/testsuite/expect/test38.11 +++ b/testsuite/expect/test38.11 @@ -58,40 +58,24 @@ proc cleanup { } { cancel_job $het_job_id } -set timeout $max_job_delay -spawn $salloc --exclusive=user -t2 -N1 : -N1 -expect { - -re "job ($number) has been allocated resources" { - set het_job_id $expect_out(1,string) - reset_bash_prompt - exp_continue - } - -re "$test_prompt" { - #log_debug "Job initiated" - } - timeout { - fail "salloc: allocation not granted in $timeout seconds" - } - eof { - wait - } -} -if {$het_job_id == 0} { - fail "salloc failure" -} +set het_job_id [submit_job "-t2 -N1 : -N1 --wrap '$bin_sleep 30'"] +wait_for_job -fail $het_job_id "RUNNING" + +run_command -fail "$srun -N1 -n1 --jobid=${het_job_id} --het-group=0 mkdir -p $tmp_dir" +run_command -fail "$srun -N1 -n1 --jobid=${het_job_id} --het-group=1 mkdir -p $tmp_dir" -run_command -fail "$srun -N1 -n1 --het-group=0 mkdir -p $tmp_dir" -run_command -fail "$srun -N1 -n1 --het-group=1 mkdir -p $tmp_dir" +run_command -fail "$srun --jobid=${het_job_id} --het-group=0 printenv SLURM_JOB_ID" +run_command -fail "$srun --jobid=${het_job_id} --het-group=1 printenv SLURM_JOB_ID" run_command -fail "$sbcast -f --jobid=$het_job_id $srun $tmp_dir/file" run_command -fail "$sbcast -f --jobid=${het_job_id}+0 $srun $tmp_dir/file_comp0" run_command -fail "$sbcast -f --jobid=${het_job_id}+1 $srun $tmp_dir/file_comp1" -subtest {![run_command_status "$srun -N1 -n1 --het-group=0 ls $tmp_dir/file"]} "Verify main file is in node of component 0" -subtest {![run_command_status "$srun -N1 -n1 --het-group=1 ls $tmp_dir/file"]} "Verify main file is in node of component 1" -subtest {![run_command_status "$srun -N1 -n1 --het-group=0 ls $tmp_dir/file_comp0"]} "Verify file_comp0 is in node of component 0" -subtest {![run_command_status "$srun -N1 -n1 --het-group=1 ls $tmp_dir/file_comp1"]} "Verify file_comp1 is in node of component 1" +subtest {![run_command_status "$srun -N1 -n1 --jobid=${het_job_id} --het-group=0 ls $tmp_dir/file"]} "Verify main file is in node of component 0" +subtest {![run_command_status "$srun -N1 -n1 --jobid=${het_job_id} --het-group=1 ls $tmp_dir/file"]} "Verify main file is in node of component 1" +subtest {![run_command_status "$srun -N1 -n1 --jobid=${het_job_id} --het-group=0 ls $tmp_dir/file_comp0"]} "Verify file_comp0 is in node of component 0" +subtest {![run_command_status "$srun -N1 -n1 --jobid=${het_job_id} --het-group=1 ls $tmp_dir/file_comp1"]} "Verify file_comp1 is in node of component 1" # TODO: This fails with multiple_slurmd because $tmp_dir is shared -# subtest {[run_command_status -xfail "$srun -N1 -n1 --het-group=0 ls $tmp_dir/file_comp1"]} "Verify file_comp1 is not is not in node of component 0" -# subtest {[run_command_status -xfail "$srun -N1 -n1 --het-group=1 ls $tmp_dir/file_comp0"]} "Verify file_comp0 is not is not in node of component 1" +# subtest {[run_command_status -xfail "$srun -N1 -n1 --jobid=${het_job_id} --het-group=0 ls $tmp_dir/file_comp1"]} "Verify file_comp1 is not is not in node of component 0" +# subtest {[run_command_status -xfail "$srun -N1 -n1 --jobid=${het_job_id} --het-group=1 ls $tmp_dir/file_comp0"]} "Verify file_comp0 is not is not in node of component 1" From ced57aeb8d2a8d806428580025f035108c982997 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Fri, 12 May 2023 10:16:33 -0600 Subject: [PATCH 17/81] Testsuite - add test_144_6.py Test gres with no_consume Bug 16671 --- testsuite/python/tests/test_144_6.py | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 testsuite/python/tests/test_144_6.py diff --git a/testsuite/python/tests/test_144_6.py b/testsuite/python/tests/test_144_6.py new file mode 100644 index 00000000000..43f4181c5a3 --- /dev/null +++ b/testsuite/python/tests/test_144_6.py @@ -0,0 +1,46 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import atf +import pytest +import re + + +# Setup +@pytest.fixture(scope="module", autouse=True) +def setup(): + atf.require_auto_config('wants to add custom gres') + atf.require_config_parameter('SelectType', 'select/cons_tres') + atf.require_config_parameter('GresTypes', 'r1,r2') + atf.require_nodes(1, + [('Gres', 'r1:no_consume:1,r2:1'), ('CPUs', 2), ('RealMemory', 2)]) + atf.require_slurm_running() + + +def test_no_consume(): + """Test gres with no_consume""" + + # Get the last node only + no_consume_output = atf.run_command_output( + 'srun --gres=r1 scontrol show node -d').split('NodeName')[-1] + assert re.search('GresUsed=.*r1:0', no_consume_output) is not None, \ + 'Expect no_consume resources to not be consumed' + consume_output = atf.run_command_output( + 'srun --gres=r2 scontrol show node -d').split('NodeName')[-1] + assert re.search('GresUsed=.*r2:1', consume_output) is not None, \ + 'Expect consumable resource to be consumed' + + +def test_no_consume_parallel(): + """Test no_consume gres with parallel jobs""" + + job_id_1 = atf.submit_job('--gres=r1 --mem=1 --wrap="sleep 20"') + job_id_2 = atf.submit_job('--gres=r1 --mem=1 --wrap="sleep 20"') + atf.wait_for_job_state(job_id_1, 'RUNNING') + squeue = atf.run_command_output('squeue') + assert re.search( + f"{job_id_1}( +[^ ]+)( +[^ ]+)( +[^ ]+) +R", squeue) is not None, \ + 'Expect first job to be running' + assert re.search( + f"{job_id_2}( +[^ ]+)( +[^ ]+)( +[^ ]+) +R", squeue) is not None, \ + 'Expect second job to be running' From 32f78e2721ecc41a764142b37d77189acaaa56ea Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 17 May 2023 14:09:06 -0600 Subject: [PATCH 18/81] Testsuite - Add test_144_6 to README Bug 16671 --- testsuite/README | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/README b/testsuite/README index e9c6a5093f0..d00f3c2614b 100644 --- a/testsuite/README +++ b/testsuite/README @@ -1026,3 +1026,4 @@ test_143_1 Test switches with topology/tree + select/cons_tres test_144_# Testing of gres. ================================ test_144_4 Test gres with file or type and --overlap +test_144_6 Test gres with no_consume flag From 9cf355d4b6d5bbf531f63266e0e0c5d447a16997 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Fri, 12 May 2023 16:22:43 -0600 Subject: [PATCH 19/81] Testsuite - modify test7.21 Change error message to correctly reflect failing condition Bug 16124 --- testsuite/expect/test7.21 | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/testsuite/expect/test7.21 b/testsuite/expect/test7.21 index 35066b2c99b..1c6b11b5580 100755 --- a/testsuite/expect/test7.21 +++ b/testsuite/expect/test7.21 @@ -37,12 +37,6 @@ set orig_spank_conf "$test_dir/orig_conf" set new_spank_conf "$test_dir/new_conf" set spank_out "$test_dir/spank.out" -# TODO: Remove this precondition check once 21.08 is no longer supported -regexp {(\d+).(\d+).(\S+)} [get_config_param SLURM_VERSION] - major minor release -if {$major < 22} { - skip "This test is disabled in Slurm versions older than 22.05 (see bug 11829 and 13967)" -} - if {![is_super_user]} { skip "This test must be run as SlurmUser" } @@ -174,7 +168,7 @@ expect { } } if {$matches != 4} { - fail "Local (srun) sbatch spank plugin failure ($matches != 2)" + fail "Local (srun) sbatch spank plugin failure ($matches != 4)" } spawn $bin_rm $spank_out From 0249923d3ed08e117b0321d464dcd10da0f5a2aa Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Fri, 19 May 2023 13:59:30 -0600 Subject: [PATCH 20/81] Testsuite - Add wait_for_job_state Wait for state RUNNING before doing the parallel assert check in test_144_6 Bug 16779 --- testsuite/python/tests/test_144_6.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/python/tests/test_144_6.py b/testsuite/python/tests/test_144_6.py index 43f4181c5a3..e432393a9dc 100644 --- a/testsuite/python/tests/test_144_6.py +++ b/testsuite/python/tests/test_144_6.py @@ -37,6 +37,7 @@ def test_no_consume_parallel(): job_id_1 = atf.submit_job('--gres=r1 --mem=1 --wrap="sleep 20"') job_id_2 = atf.submit_job('--gres=r1 --mem=1 --wrap="sleep 20"') atf.wait_for_job_state(job_id_1, 'RUNNING') + atf.wait_for_job_state(job_id_2, 'RUNNING') squeue = atf.run_command_output('squeue') assert re.search( f"{job_id_1}( +[^ ]+)( +[^ ]+)( +[^ ]+) +R", squeue) is not None, \ From b2fca0087d6b287bf221ce6d7ead5af773aaebb9 Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 17 May 2023 17:29:31 -0600 Subject: [PATCH 21/81] Testsuite - Update conftest tmp_path Update tmp_path permissions in pytest 6+ as newer versions of pytest have changed default permissions Bug 16568 --- testsuite/python/conftest.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/testsuite/python/conftest.py b/testsuite/python/conftest.py index 04d87b7aafd..1a9ac737648 100644 --- a/testsuite/python/conftest.py +++ b/testsuite/python/conftest.py @@ -15,6 +15,7 @@ sys.path.append(sys.path[0] + '/lib') import atf + # Add test description (docstring) as a junit property def pytest_itemcollected(item): node = item.obj @@ -78,20 +79,49 @@ def session_setup(request): color_log_level(logging.TRACE, purple=True, bold=True) +def update_tmp_path_exec_permissions(): + """ + For pytest versions 6+ the tmp path it uses no longer has + public exec permissions for dynamically created directories by default. + + This causes problems when trying to read temp files during tests as + users other than atf (ie slurm). The tests will fail with permission denied. + + To fix this we check and add the x bit to the public group on tmp + directories so the files inside can be read. Adding just 'read' is + not enough + + Bug 16568 + """ + + user_name = atf.get_user_name() + path = f"/tmp/pytest-of-{user_name}" + + if os.path.isdir(path): + os.chmod(path, 0o777) + for root, dirs, files in os.walk(path): + for d in dirs : + os.chmod(os.path.join(root, d), 0o777) + + +@pytest.fixture(scope="function", autouse=True) +def tmp_path_setup(request): + update_tmp_path_exec_permissions() + + @pytest.fixture(scope="module", autouse=True) def module_setup(request, tmp_path_factory): - atf.properties['slurm-started'] = False atf.properties['configurations-modified'] = set() atf.properties['accounting-database-modified'] = False atf.properties['orig-environment'] = dict(os.environ) - #print(f"properties = {atf.properties}") # Creating a module level tmp_path mimicing what tmp_path does name = request.node.name name = re.sub(r'[\W]', '_', name) name = name[:30] atf.module_tmp_path = tmp_path_factory.mktemp(name, numbered=True) + update_tmp_path_exec_permissions() # Module-level fixtures should run from within the module_tmp_path os.chdir(atf.module_tmp_path) @@ -111,11 +141,9 @@ def module_setup(request, tmp_path_factory): def module_teardown(): - failures = [] if atf.properties['auto-config']: - if atf.properties['slurm-started'] == True: # Cancel all jobs From 3fddd4430a1937cd619c1620c233df32e9e36d7c Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Tue, 23 May 2023 11:17:09 +0200 Subject: [PATCH 22/81] Testsuite - Fix run-tests avoiding error for common tests This a fix of a regression introduced in b282e2b006. Bug 16262 --- testsuite/run-tests | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testsuite/run-tests b/testsuite/run-tests index 024abaf3cd4..dd1201f8412 100755 --- a/testsuite/run-tests +++ b/testsuite/run-tests @@ -230,7 +230,8 @@ sub run_unit_tests { } # Execute the unit test using the test driver - my $test_output = `make check TESTS='${test}' 2>&1`; + # Set SUBDIRS to nothing so it doesnt descend into dirs without tests + my $test_output = `make check TESTS='${test}' SUBDIRS= 2>&1`; $rc = $? >> 8; my $test_status = 'Passed'; From 8dfca046238acbc43dec452a6af9f470c06bade6 Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Mon, 22 May 2023 16:20:35 -0600 Subject: [PATCH 23/81] Testsuite - atf.submit_job => atf.submit_job_sbatch refactor Bug 16795 --- testsuite/python/lib/atf.py | 2 +- testsuite/python/tests/test_105_1.py | 4 +-- testsuite/python/tests/test_105_2.py | 2 +- testsuite/python/tests/test_108_4.py | 2 +- testsuite/python/tests/test_113_2.py | 4 +-- testsuite/python/tests/test_116_12.py | 12 ++++---- testsuite/python/tests/test_116_14.py | 2 +- testsuite/python/tests/test_116_19.py | 2 +- testsuite/python/tests/test_116_2.py | 2 +- testsuite/python/tests/test_116_25.py | 2 +- testsuite/python/tests/test_116_27.py | 2 +- testsuite/python/tests/test_116_7.py | 2 +- testsuite/python/tests/test_121_1.py | 8 +++--- testsuite/python/tests/test_121_2.py | 4 +-- testsuite/python/tests/test_122_1.py | 2 +- testsuite/python/tests/test_122_2.py | 2 +- testsuite/python/tests/test_125_1.py | 6 ++-- testsuite/python/tests/test_126_1.py | 40 +++++++++++++-------------- testsuite/python/tests/test_128_1.py | 12 ++++---- testsuite/python/tests/test_130_1.py | 6 ++-- testsuite/python/tests/test_130_3.py | 6 ++-- testsuite/python/tests/test_132_1.py | 4 +-- testsuite/python/tests/test_136_1.py | 14 +++++----- testsuite/python/tests/test_137_1.py | 2 +- testsuite/python/tests/test_144_4.py | 2 +- testsuite/python/tests/test_144_6.py | 4 +-- 26 files changed, 75 insertions(+), 75 deletions(-) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index cc35b653fae..8bce7222777 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -1342,7 +1342,7 @@ def require_sudo_rights(): pytest.skip("This test requires the test user to have unprompted sudo privileges", allow_module_level=True) -def submit_job(sbatch_args="--wrap \"sleep 60\"", **run_command_kwargs): +def submit_job_sbatch(sbatch_args="--wrap \"sleep 60\"", **run_command_kwargs): """Submits a job using sbatch and returns the job id. The submitted job will automatically be cancelled when the test ends. diff --git a/testsuite/python/tests/test_105_1.py b/testsuite/python/tests/test_105_1.py index 9e7253b595c..b9a60069f98 100644 --- a/testsuite/python/tests/test_105_1.py +++ b/testsuite/python/tests/test_105_1.py @@ -78,7 +78,7 @@ def test_gpus_per_cpu(init_gpu_vars): gpu_bind = 'closest' gpu_freq = 'medium' - job_id = atf.submit_job(f"--cpus-per-gpu={cpus_per_gpu} --gpu-bind={gpu_bind} --gpu-freq={gpu_freq} --gpus={gpu_count} --gpus-per-node={gpus_per_node} --gpus-per-task={gpus_per_task} --mem-per-gpu={memory_per_gpu} --nodes={node_count} --ntasks={task_count} -t1 --wrap \"true\"", fatal=True) + job_id = atf.submit_job_sbatch(f"--cpus-per-gpu={cpus_per_gpu} --gpu-bind={gpu_bind} --gpu-freq={gpu_freq} --gpus={gpu_count} --gpus-per-node={gpus_per_node} --gpus-per-task={gpus_per_task} --mem-per-gpu={memory_per_gpu} --nodes={node_count} --ntasks={task_count} -t1 --wrap \"true\"", fatal=True) job_dict = atf.get_job(job_id) assert job_dict['CpusPerTres'] == f"gres:gpu:{cpus_per_gpu}" @@ -94,7 +94,7 @@ def test_gpus_per_socket(init_gpu_vars): """Test a batch job with various gpu options including --gpus-per-socket""" gpus_per_socket = 1 - job_id = atf.submit_job(f"--cpus-per-gpu={cpus_per_gpu} --gpus-per-socket={gpus_per_socket} --sockets-per-node={sockets_per_node} --nodes={node_count} --ntasks={task_count} -t1 --wrap \"true\"", fatal=True) + job_id = atf.submit_job_sbatch(f"--cpus-per-gpu={cpus_per_gpu} --gpus-per-socket={gpus_per_socket} --sockets-per-node={sockets_per_node} --nodes={node_count} --ntasks={task_count} -t1 --wrap \"true\"", fatal=True) job_dict = atf.get_job(job_id) assert job_dict['TresPerSocket'] == f"gres:gpu:{gpus_per_socket}" diff --git a/testsuite/python/tests/test_105_2.py b/testsuite/python/tests/test_105_2.py index 3897cfa8e40..417f7c40b30 100644 --- a/testsuite/python/tests/test_105_2.py +++ b/testsuite/python/tests/test_105_2.py @@ -23,7 +23,7 @@ def test_hetjob(tmp_path): #SBATCH --cpus-per-task=1 --mem-per-cpu=6 --ntasks=1 -t1 $bin_sleep 300""") - leader_job_id = atf.submit_job(f"-t1 {file_in}", fatal=True) + leader_job_id = atf.submit_job_sbatch(f"-t1 {file_in}", fatal=True) jobs_dict = atf.get_jobs() # Verify details about leader job diff --git a/testsuite/python/tests/test_108_4.py b/testsuite/python/tests/test_108_4.py index 5f35480e107..42d5fc5cdb2 100644 --- a/testsuite/python/tests/test_108_4.py +++ b/testsuite/python/tests/test_108_4.py @@ -15,7 +15,7 @@ def setup(): @pytest.fixture(scope='module') def batch_job(): """Submit a batch job and wait for it to start running""" - job_id = atf.submit_job(fatal=True) + job_id = atf.submit_job_sbatch(fatal=True) atf.wait_for_job_state(job_id, 'RUNNING', fatal=True) return job_id diff --git a/testsuite/python/tests/test_113_2.py b/testsuite/python/tests/test_113_2.py index 070a0fa522d..2bb930311df 100644 --- a/testsuite/python/tests/test_113_2.py +++ b/testsuite/python/tests/test_113_2.py @@ -29,8 +29,8 @@ def queued_job(default_partition): if default_partition in node_partitions and node_dict['State'] == 'IDLE': total_cpus += node_dict['CPUTot'] - running_job_id = atf.submit_job(f"--output=/dev/null --error=/dev/null -n {total_cpus} --exclusive --wrap=\"sleep 600\"", fatal=True) - queued_job_id = atf.submit_job(f"--output=/dev/null --error=/dev/null -n {total_cpus} --exclusive --wrap=\"sleep 600\"", fatal=True) + running_job_id = atf.submit_job_sbatch(f"--output=/dev/null --error=/dev/null -n {total_cpus} --exclusive --wrap=\"sleep 600\"", fatal=True) + queued_job_id = atf.submit_job_sbatch(f"--output=/dev/null --error=/dev/null -n {total_cpus} --exclusive --wrap=\"sleep 600\"", fatal=True) return queued_job_id diff --git a/testsuite/python/tests/test_116_12.py b/testsuite/python/tests/test_116_12.py index 8bdc5675f39..681448957a0 100644 --- a/testsuite/python/tests/test_116_12.py +++ b/testsuite/python/tests/test_116_12.py @@ -156,7 +156,7 @@ def test_output_error_formatting(tmp_path): srun -O --output={file_out} true done""") os.chmod(file_in, 0o0777) - job_id = atf.submit_job(f"-N{node_count} --output /dev/null {str(file_in)}") + job_id = atf.submit_job_sbatch(f"-N{node_count} --output /dev/null {str(file_in)}") atf.wait_for_job_state(job_id, 'DONE') tmp_dir_list = os.listdir(tmp_path) for step in range(0,step_count): @@ -170,7 +170,7 @@ def test_output_error_formatting(tmp_path): srun -O --error={file_err} true done""") os.chmod(file_in, 0o0777) - job_id = atf.submit_job(f"-N{node_count} --output /dev/null {str(file_in)}") + job_id = atf.submit_job_sbatch(f"-N{node_count} --output /dev/null {str(file_in)}") atf.wait_for_job_state(job_id, 'DONE') tmp_dir_list = os.listdir(tmp_path) for step in range(0,step_count): @@ -227,7 +227,7 @@ def test_output_error_formatting(tmp_path): file_in = tmp_path / "file_in.A.input" atf.make_bash_script(file_in, f"""srun -O --output={file_out} hostname""") os.chmod(file_in, 0o0777) - job_id = atf.submit_job(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") + job_id = atf.submit_job_sbatch(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") atf.wait_for_job_state(job_id, "DONE") os.remove(file_in) result_out = fpc.get_tmp_file() @@ -236,7 +236,7 @@ def test_output_error_formatting(tmp_path): atf.make_bash_script(file_in, f"""srun -O --error={file_err} uid""") os.chmod(file_in, 0o0777) - job_id = atf.submit_job(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") + job_id = atf.submit_job_sbatch(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") atf.wait_for_job_state(job_id, "DONE") os.remove(file_in) result_err = fpc.get_tmp_file() @@ -250,7 +250,7 @@ def test_output_error_formatting(tmp_path): file_in = tmp_path / "file_in.A.a.input" atf.make_bash_script(file_in, f"""srun -O --output={file_out} hostname""") os.chmod(file_in, 0o0777) - job_id = atf.submit_job(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") + job_id = atf.submit_job_sbatch(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") atf.wait_for_job_state(job_id, "DONE") tmp_dir_list = os.listdir(tmp_path) for array_id in range(1,array_size+1): @@ -259,7 +259,7 @@ def test_output_error_formatting(tmp_path): fpc.remove_file(id_file) atf.make_bash_script(file_in, f"""srun -O --error={file_err} uid""") - job_id = atf.submit_job(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") + job_id = atf.submit_job_sbatch(f"-N1 --output=/dev/null --array=1-{array_size} {file_in}") atf.wait_for_job_state(job_id, "DONE") tmp_dir_list = os.listdir(tmp_path) for array_id in range(1,array_size+1): diff --git a/testsuite/python/tests/test_116_14.py b/testsuite/python/tests/test_116_14.py index 22cc9c51dad..97aac9027aa 100644 --- a/testsuite/python/tests/test_116_14.py +++ b/testsuite/python/tests/test_116_14.py @@ -31,7 +31,7 @@ def test_preserve_env(): srun -n{srun_tasks} -N{srun_nodes} printenv SLURM_NTASKS""") atf.run_command("srun printenv SLURM_NNODES") - job_id = atf.submit_job(f"-O --output={file_out} -N{num_nodes} -n{num_tasks} {file_in}") + job_id = atf.submit_job_sbatch(f"-O --output={file_out} -N{num_nodes} -n{num_tasks} {file_in}") atf.wait_for_job_state(job_id, 'DONE') atf.wait_for_file(file_out) f = open(file_out, 'r') diff --git a/testsuite/python/tests/test_116_19.py b/testsuite/python/tests/test_116_19.py index e6ca9527e34..f201c84c18d 100644 --- a/testsuite/python/tests/test_116_19.py +++ b/testsuite/python/tests/test_116_19.py @@ -72,7 +72,7 @@ def test_user_limits(): ulimit -s {limit_stack} srun python3 {script_file}""") - job_id = atf.submit_job(f"--output={file_out} --error={file_err} {file_in}") + job_id = atf.submit_job_sbatch(f"--output={file_out} --error={file_err} {file_in}") atf.wait_for_job_state(job_id, 'DONE') f = open(file_out, 'r') line = f.readline() diff --git a/testsuite/python/tests/test_116_2.py b/testsuite/python/tests/test_116_2.py index 874bca95769..cf5cacb9abf 100644 --- a/testsuite/python/tests/test_116_2.py +++ b/testsuite/python/tests/test_116_2.py @@ -21,7 +21,7 @@ def test_account(): my_acct = 'MY_ACCT' qa_acct = 'QA_ACCT' os.environ["SLURM_ACCOUNT"] = "QA_ACCT" - job_id = atf.submit_job(f'--account={my_acct} --wrap="sleep 5"') + job_id = atf.submit_job_sbatch(f'--account={my_acct} --wrap="sleep 5"') assert job_id != 0, f'Batch submit failure' output = atf.run_command_output(f'scontrol show job {job_id}') assert re.search(f'Account={my_acct.lower()}', output), f'Account information not processed from sbatch' diff --git a/testsuite/python/tests/test_116_25.py b/testsuite/python/tests/test_116_25.py index c818ea21420..0b16edc6e48 100644 --- a/testsuite/python/tests/test_116_25.py +++ b/testsuite/python/tests/test_116_25.py @@ -26,7 +26,7 @@ def test_signal_forwarding(): file_out = atf.module_tmp_path / 'file_out' script_file = pathlib.Path(atf.properties['testsuite_python_lib']) / 'signal_forwarding_script.py' atf.make_bash_script(file_in, f"""srun --output={file_out} python3 {script_file}""") - job_id = atf.submit_job(f'{file_in}') + job_id = atf.submit_job_sbatch(f'{file_in}') for i in range(sig1_count): atf.run_command(f"scancel --signal {signal.SIGUSR1.value} {job_id}") diff --git a/testsuite/python/tests/test_116_27.py b/testsuite/python/tests/test_116_27.py index ee1f9162855..1549ee2c466 100644 --- a/testsuite/python/tests/test_116_27.py +++ b/testsuite/python/tests/test_116_27.py @@ -40,7 +40,7 @@ def test_mail_type_and_mail_user(mail_program_out): assert re.findall(rf"SLURM_JOB_ID={job_id} SLURM_JOB_USER={atf.properties[slurm_user]} SLURM_JOB_MAIL_TYPE=Began", output) is not None, "Start mail not sent" assert re.findall(rf"SLURM_JOB_ID={job_id} SLURM_JOB_USER={atf.properties[slurm_user]} SLURM_JOB_MAIL_TYPE=Ended", output) is not None, "End mail not sent" - job_id = atf.submit_job(f'--wrap="srun --mail-type=END --mail-user={atf.properties[slurm_user]} sleep 100"') + job_id = atf.submit_job_sbatch(f'--wrap="srun --mail-type=END --mail-user={atf.properties[slurm_user]} sleep 100"') atf.wait_for_job_state(job_id, 'RUNNING') atf.run_command(f"scancel {job_id}") output = atf.run_command_output(f"cat {mail_program_out}") diff --git a/testsuite/python/tests/test_116_7.py b/testsuite/python/tests/test_116_7.py index e0922148c35..ebcbc1edf14 100644 --- a/testsuite/python/tests/test_116_7.py +++ b/testsuite/python/tests/test_116_7.py @@ -39,7 +39,7 @@ def test_batch_multiple_concurrent_steps(): wait""") # Spawn a batch job with multiple steps in background - job_id = atf.submit_job(f"-O {job_mem_opt} -n{steps_submitted} --output={file_out} {file_in}") + job_id = atf.submit_job_sbatch(f"-O {job_mem_opt} -n{steps_submitted} --output={file_out} {file_in}") atf.wait_for_job_state(job_id, 'RUNNING', fatal=True) # Check that all of the steps in background are in squeue at the same time within a time limit diff --git a/testsuite/python/tests/test_121_1.py b/testsuite/python/tests/test_121_1.py index 8bc6be43484..10881989130 100644 --- a/testsuite/python/tests/test_121_1.py +++ b/testsuite/python/tests/test_121_1.py @@ -86,7 +86,7 @@ def test_two_parallel_consumption_sbatch(mps_nodes, file_in_2a): file_out1 = atf.module_tmp_path / "output1" - job_id = atf.submit_job(f"--gres=mps:{job_mps} -w {mps_nodes[0]} -n1 -t1 -o {file_out1} {file_in_2a}") + job_id = atf.submit_job_sbatch(f"--gres=mps:{job_mps} -w {mps_nodes[0]} -n1 -t1 -o {file_out1} {file_in_2a}") assert job_id != 0, "Job failed to submit" atf.wait_for_job_state(job_id, 'DONE', timeout=15, fatal=True) atf.wait_for_file(file_out1) @@ -130,7 +130,7 @@ def test_three_parallel_consumption_sbatch(mps_nodes, file_in_1a): date """) - job_id = atf.submit_job(f"--gres=mps:{job_mps} -w {mps_nodes[0]} -c6 -n1 -t1 -o {file_out1} {file_in2}") + job_id = atf.submit_job_sbatch(f"--gres=mps:{job_mps} -w {mps_nodes[0]} -c6 -n1 -t1 -o {file_out1} {file_in2}") assert job_id != 0, "Job failed to submit" atf.wait_for_job_state(job_id, 'DONE', timeout=20, fatal=True) @@ -157,7 +157,7 @@ def test_consume_more_gresMps_than_allocated(mps_nodes, file_in_1a): srun --mem=0 --overlap --gres=mps:{step_mps2} {file_in_1a} """) - job_id = atf.submit_job(f"--gres=mps:{job_mps2} -w {mps_nodes[0]} -n1 -t1 -o {file_out1} {file_in2}") + job_id = atf.submit_job_sbatch(f"--gres=mps:{job_mps2} -w {mps_nodes[0]} -n1 -t1 -o {file_out1} {file_in2}") assert job_id != 0, "Job failed to submit" atf.wait_for_job_state(job_id, 'DONE', timeout=20, fatal=True) @@ -214,7 +214,7 @@ def test_gresGPU_gresMPS_GPU_sharing(mps_nodes): squeue --name=test_job --noheader --state=r --format=\"jobid=%i state=%T\" """) - job_id = atf.submit_job(f"--gres=gpu:1 -w {mps_nodes[0]} -n1 -t1 -o {file_out1} -J 'test_job' {file_in1}") + job_id = atf.submit_job_sbatch(f"--gres=gpu:1 -w {mps_nodes[0]} -n1 -t1 -o {file_out1} -J 'test_job' {file_in1}") assert job_id != 0, "Job failed to submit" atf.wait_for_job_state(job_id, 'DONE', timeout=35, fatal=True) diff --git a/testsuite/python/tests/test_121_2.py b/testsuite/python/tests/test_121_2.py index 7cfde350fa9..224c60c4ca6 100644 --- a/testsuite/python/tests/test_121_2.py +++ b/testsuite/python/tests/test_121_2.py @@ -68,7 +68,7 @@ def test_gres_mps_option_job(): scontrol -dd show job $SLURM_JOBID | grep mps exit 0""") - job_id1 = atf.submit_job(f"--gres=craynetwork:0 --gres=mps:{mps_fail_cnt} -N{node_count} -t1 -o {file_out1} -J 'test_job' {file_in}") + job_id1 = atf.submit_job_sbatch(f"--gres=craynetwork:0 --gres=mps:{mps_fail_cnt} -N{node_count} -t1 -o {file_out1} -J 'test_job' {file_in}") assert job_id1 != 0, "Job 1 failed to submit" atf.repeat_command_until(f"scontrol show job {job_id1}", lambda results: re.search(r"Reason=.*AssocMaxGRESPerJob", results['stdout']), fatal=True) @@ -79,6 +79,6 @@ def test_gres_mps_option_job(): assert re.search(r"JobState=PENDING", output), "Job state is bad (JobState != PENDING)" assert re.search(r"Reason=.*AssocMaxGRESPerJob", output), "Job state is bad (Reason != '.*AssocMaxGRESPerJob ')" - job_id2 = atf.submit_job(f"--account='test_mps_acct' --gres=craynetwork:0 --gres=mps:{mps_good_cnt} -N{node_count} -t1 -o {file_out2} -J 'test_job2' {file_in}") + job_id2 = atf.submit_job_sbatch(f"--account='test_mps_acct' --gres=craynetwork:0 --gres=mps:{mps_good_cnt} -N{node_count} -t1 -o {file_out2} -J 'test_job2' {file_in}") assert job_id2 != 0, "Job 2 failed to submit" assert atf.wait_for_job_state(job_id2, 'DONE', fatal=True) diff --git a/testsuite/python/tests/test_122_1.py b/testsuite/python/tests/test_122_1.py index 8b3490a2315..98de9e5ec01 100644 --- a/testsuite/python/tests/test_122_1.py +++ b/testsuite/python/tests/test_122_1.py @@ -17,7 +17,7 @@ def test_submit_and_cancel(): """Test basic submission and cancellation of job arrays""" # Submit a job array - job_id = atf.submit_job(f"-N 1 --array=0-{array_size - 1} --begin=midnight --output=/dev/null --wrap=\"sleep 10\"", fatal=True) + job_id = atf.submit_job_sbatch(f"-N 1 --array=0-{array_size - 1} --begin=midnight --output=/dev/null --wrap=\"sleep 10\"", fatal=True) # Verify the task count array_task_id = atf.get_job_parameter(job_id, 'ArrayTaskId') diff --git a/testsuite/python/tests/test_122_2.py b/testsuite/python/tests/test_122_2.py index e680ba3d8be..15ec72e3ea7 100644 --- a/testsuite/python/tests/test_122_2.py +++ b/testsuite/python/tests/test_122_2.py @@ -19,7 +19,7 @@ def test_job_array_with_gres(): """Test creating job array with gres requested""" output_pattern = f"{atf.module_tmp_path}/%A-%a.out" - job_id = atf.submit_job( + job_id = atf.submit_job_sbatch( f"--array=1-2 --gres=r1:1 --wrap='echo DONE' \ --output={output_pattern}") output_file_1 = f"{atf.module_tmp_path}/{job_id}-1.out" diff --git a/testsuite/python/tests/test_125_1.py b/testsuite/python/tests/test_125_1.py index ef2a4965542..63df7230c2b 100644 --- a/testsuite/python/tests/test_125_1.py +++ b/testsuite/python/tests/test_125_1.py @@ -55,10 +55,10 @@ def test_create_and_use_burst_buffer(tmp_path): """) # Submit a job to use the persisent burst buffer - bb_use_job_id = atf.submit_job(f"-N1 -t1 -o {bb_use_output} {bb_use_script}", fatal=True) + bb_use_job_id = atf.submit_job_sbatch(f"-N1 -t1 -o {bb_use_output} {bb_use_script}", fatal=True) # Submit a job to create the persisent burst buffer - bb_create_job_id = atf.submit_job(f"-N1 -t1 -o {bb_create_output} {bb_create_script}", fatal=True) + bb_create_job_id = atf.submit_job_sbatch(f"-N1 -t1 -o {bb_create_output} {bb_create_script}", fatal=True) # The burst buffer creation should complete first assert atf.wait_for_job_state(bb_create_job_id, 'DONE', timeout=660), f"Burst buffer creation job ({bb_create_job_id}) did not run" @@ -84,7 +84,7 @@ def test_remove_burst_buffer(tmp_path): """) # Submit a job to delete the persisent burst buffer - bb_delete_job_id = atf.submit_job(f"-N1 -t1 -o {bb_delete_output} {bb_delete_script}", fatal=True) + bb_delete_job_id = atf.submit_job_sbatch(f"-N1 -t1 -o {bb_delete_output} {bb_delete_script}", fatal=True) # The burst buffer deletion job should complete assert atf.wait_for_job_state(bb_delete_job_id, 'DONE', timeout=660), f"Burst buffer deletion job ({bb_delete_job_id}) did not run" diff --git a/testsuite/python/tests/test_126_1.py b/testsuite/python/tests/test_126_1.py index b50e6da729a..b3d0a8ffd76 100644 --- a/testsuite/python/tests/test_126_1.py +++ b/testsuite/python/tests/test_126_1.py @@ -158,19 +158,19 @@ def enforce_ALL(limit_name, flag, val_fail, val_pass): if limit_name == "AllowAccounts": custom_val_fail = '' # 1 Reject p1,p2 with no p1 limit met - assert atf.submit_job(f"-p p1,p2 {flag}{val_fail} --wrap \"hostname\" -o /dev/null") == 0, f"Job should fail on p1,p2 due to {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ALL" + assert atf.submit_job_sbatch(f"-p p1,p2 {flag}{val_fail} --wrap \"hostname\" -o /dev/null") == 0, f"Job should fail on p1,p2 due to {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ALL" # 2 Reject p1 no limit met - assert atf.submit_job(f"-p p1 {flag}{val_fail} --wrap \"hostname\" -o /dev/null") == 0, f"Job should fail on p1 due to {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ALL" + assert atf.submit_job_sbatch(f"-p p1 {flag}{val_fail} --wrap \"hostname\" -o /dev/null") == 0, f"Job should fail on p1 due to {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ALL" # 3 Accept p2 no limit met ** This one and the first have a memory conflict - assert atf.submit_job(f"-p p2 {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ALL" + assert atf.submit_job_sbatch(f"-p p2 {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ALL" # 4 Accept p1 with limit met - assert atf.submit_job(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ALL" + assert atf.submit_job_sbatch(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ALL" # 5 Accept p1,p2 with p1 limit met - assert atf.submit_job(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ALL" + assert atf.submit_job_sbatch(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ALL" def enforce_ANY(limit_name, flag, val_fail, val_pass): @@ -185,19 +185,19 @@ def enforce_ANY(limit_name, flag, val_fail, val_pass): if limit_name == "AllowAccounts": custom_val_fail = '' # 1 Accept p1,p2 with no p1 limit met - assert atf.submit_job(f"-p p1,p2 {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 despite {limit_name} not met on p1 with EnforcePartLimits=ANY" + assert atf.submit_job_sbatch(f"-p p1,p2 {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 despite {limit_name} not met on p1 with EnforcePartLimits=ANY" # 2 Reject p1 no limit met - assert atf.submit_job(f"-p p1 {flag}{val_fail} --wrap \"hostname\" -o /dev/null") == 0, f"Job should fail on p1 due to {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ANY" + assert atf.submit_job_sbatch(f"-p p1 {flag}{val_fail} --wrap \"hostname\" -o /dev/null") == 0, f"Job should fail on p1 due to {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ANY" # 3 Accept p2 no limit met ** This one and the first have a memory conflict - assert atf.submit_job(f"-p p2 {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ANY" + assert atf.submit_job_sbatch(f"-p p2 {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=ANY" # 4 Accept p1 with limit met - assert atf.submit_job(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ANY" + assert atf.submit_job_sbatch(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ANY" # 5 Accept p1,p2 with p1 limit met - assert atf.submit_job(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ANY" + assert atf.submit_job_sbatch(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=ANY" def enforce_NO(limit_name, flag, val_fail, val_pass): @@ -212,23 +212,23 @@ def enforce_NO(limit_name, flag, val_fail, val_pass): if limit_name == "AllowAccounts": custom_val_fail = '' # 1 Submit -> pend on p1,p2 with bad p1 limit set -> complete with p1 limit met - job_id = atf.submit_job(f"-p p1,p2 {flag}{custom_val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) + job_id = atf.submit_job_sbatch(f"-p p1,p2 {flag}{custom_val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) satisfy_pending_job_limit(job_id, limit_name, custom_val_fail); assert atf.get_job_parameter(job_id, 'JobState', quiet=True) == 'COMPLETED', f"Job should submit, pend, then complete on p1,p2 with updated limit {limit_name} on partition p1 to passing valueswith EnforcePartLimits=NO" # 2 Submit -> pend on just p1 with bad limit, then complete with good limit - job_id = atf.submit_job(f"-p p1 {flag}{custom_val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) + job_id = atf.submit_job_sbatch(f"-p p1 {flag}{custom_val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) satisfy_pending_job_limit(job_id, limit_name, custom_val_fail); assert atf.get_job_parameter(job_id, 'JobState', quiet=True) == 'COMPLETED', f"Job should submit, pend, then complete on p1 with updated limit {limit_name} on partition p1 to passing values with EnforcePartLimits=NO" # 3 Submit -> complete on p2 with no limit set - assert atf.submit_job(f"-p p2, {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=NO" + assert atf.submit_job_sbatch(f"-p p2, {flag}{custom_val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=NO" # 4 Submit -> complete on p1 with p1 limit met - assert atf.submit_job(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" + assert atf.submit_job_sbatch(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" # 5 Submit -> complete on p1,p2 with p1 limit met - assert atf.submit_job(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" + assert atf.submit_job_sbatch(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" def enforce_NO_QOS(limit_name, flag, val_fail, val_pass): @@ -237,7 +237,7 @@ def enforce_NO_QOS(limit_name, flag, val_fail, val_pass): val_fail = "bad_qos" # 1 Submit -> pend on p1,p2 with bad p1 limit set -> complete with p1 limit met - job_id = atf.submit_job(f"-p p1,p2 {flag}{val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) + job_id = atf.submit_job_sbatch(f"-p p1,p2 {flag}{val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) satisfy_pending_job_limit(job_id, limit_name, f"{val_pass},{val_fail}"); assert atf.get_job_parameter(job_id, 'JobState', quiet=True) == 'COMPLETED', f"Job should submit, pend, then complete on p1,p2 with updated limit {limit_name} on partition p1 to passing valueswith EnforcePartLimits=NO" @@ -245,7 +245,7 @@ def enforce_NO_QOS(limit_name, flag, val_fail, val_pass): atf.run_command(f"scontrol update partitionname=p1 {limit_name}={val_pass}", user=atf.properties['slurm-user'], fatal=True, quiet=True) # 2 Submit -> pend on just p1 with bad limit, then complete with good limit - job_id = atf.submit_job(f"-p p1 {flag}{val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) + job_id = atf.submit_job_sbatch(f"-p p1 {flag}{val_fail} --wrap \"hostname&\" -o /dev/null", timeout=1) satisfy_pending_job_limit(job_id, limit_name, f"{val_pass},{val_fail}"); assert atf.get_job_parameter(job_id, 'JobState', quiet=True) == 'COMPLETED', f"Job should submit, pend, then complete on p1 with updated limit {limit_name} on partition p1 to passing values with EnforcePartLimits=NO" @@ -253,13 +253,13 @@ def enforce_NO_QOS(limit_name, flag, val_fail, val_pass): atf.run_command(f"scontrol update partitionname=p1 {limit_name}={val_pass}", user=atf.properties['slurm-user'], fatal=True, quiet=True) # 3 Submit -> complete on p2 with no limit set - assert atf.submit_job(f"-p p2, {flag}{val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=NO" + assert atf.submit_job_sbatch(f"-p p2, {flag}{val_fail} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p2 despite {limit_name} limit not met on the required partition p1 with EnforcePartLimits=NO" # 4 Submit -> complete on p1 with p1 limit met - assert atf.submit_job(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" + assert atf.submit_job_sbatch(f"-p p1 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" # 5 Submit -> complete on p1,p2 with p1 limit met - assert atf.submit_job(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" + assert atf.submit_job_sbatch(f"-p p1,p2 {flag}{val_pass} --wrap \"hostname\" -o /dev/null") != 0, f"Job should pass on p1,p2 due to {limit_name} limit met on the required partition p1 with EnforcePartLimits=NO" # Tests: diff --git a/testsuite/python/tests/test_128_1.py b/testsuite/python/tests/test_128_1.py index cce81e2429c..3022039ff94 100644 --- a/testsuite/python/tests/test_128_1.py +++ b/testsuite/python/tests/test_128_1.py @@ -46,9 +46,9 @@ def cancel_jobs(): def test_preempt_cancel(partition1, partition2, cancel_jobs): """Test preempt cancel""" - job_id1 = atf.submit_job(f"-N1 -t1 -o /dev/null --exclusive -p {partition1} --wrap \"sleep 120\"", fatal=True) + job_id1 = atf.submit_job_sbatch(f"-N1 -t1 -o /dev/null --exclusive -p {partition1} --wrap \"sleep 120\"", fatal=True) assert atf.wait_for_job_state(job_id1, 'RUNNING'), f"Job 1 ({job_id1}) did not start" - job_id2 = atf.submit_job(f"-N1 -t1 -o /dev/null --exclusive -p {partition2} --wrap \"sleep 30\"", fatal=True) + job_id2 = atf.submit_job_sbatch(f"-N1 -t1 -o /dev/null --exclusive -p {partition2} --wrap \"sleep 30\"", fatal=True) assert atf.wait_for_job_state(job_id2, 'RUNNING'), f"Job 2 ({job_id2}) did not start" assert atf.wait_for_job_state(job_id1, 'PREEMPTED'), f"Job 1 ({job_id1}) did not get preempted" @@ -58,9 +58,9 @@ def test_preempt_suspend(partition1, partition2, cancel_jobs): atf.run_command(f"scontrol update partitionname={partition1} preemptmode=suspend", user=atf.properties['slurm-user'], fatal=True) - job_id1 = atf.submit_job(f"-N1 -t1 -o /dev/null --exclusive -p {partition1} --wrap \"sleep 120\"", fatal=True) + job_id1 = atf.submit_job_sbatch(f"-N1 -t1 -o /dev/null --exclusive -p {partition1} --wrap \"sleep 120\"", fatal=True) assert atf.wait_for_job_state(job_id1, 'RUNNING'), f"Job 1 ({job_id1}) did not start" - job_id2 = atf.submit_job(f"-N1 -t1 -o /dev/null --exclusive -p {partition2} --wrap \"sleep 30\"", fatal=True) + job_id2 = atf.submit_job_sbatch(f"-N1 -t1 -o /dev/null --exclusive -p {partition2} --wrap \"sleep 30\"", fatal=True) assert atf.wait_for_job_state(job_id2, 'RUNNING'), f"Job 2 {job_id2} did not start" assert atf.wait_for_job_state(job_id1, 'SUSPENDED'), f"Job 1 ({job_id1}) did not get suspended" assert atf.wait_for_job_state(job_id2, 'DONE', timeout=60, poll_interval=1), f"Job 2 ({job_id2}) did not complete" @@ -72,9 +72,9 @@ def test_preempt_requeue(partition1, partition2, cancel_jobs): atf.run_command(f"scontrol update partitionname={partition1} preemptmode=requeue", user=atf.properties['slurm-user'], fatal=True) - job_id1 = atf.submit_job(f"-N1 -t1 -o /dev/null --exclusive -p {partition1} --wrap \"sleep 120\"", fatal=True) + job_id1 = atf.submit_job_sbatch(f"-N1 -t1 -o /dev/null --exclusive -p {partition1} --wrap \"sleep 120\"", fatal=True) assert atf.wait_for_job_state(job_id1, 'RUNNING'), f"Job 1 ({job_id1}) did not start" - job_id2 = atf.submit_job(f"-N1 -t1 -o /dev/null --exclusive -p {partition2} --wrap \"sleep 30\"", fatal=True) + job_id2 = atf.submit_job_sbatch(f"-N1 -t1 -o /dev/null --exclusive -p {partition2} --wrap \"sleep 30\"", fatal=True) assert atf.wait_for_job_state(job_id2, 'RUNNING'), f"Job 2 ({job_id2}) did not start" assert atf.wait_for_job_state(job_id1, 'PENDING'), f"Job 1 ({job_id1}) did not return to pending" assert atf.wait_for_job_state(job_id2, 'DONE', timeout=60, poll_interval=1), f"Job 2 ({job_id2}) did not complete" diff --git a/testsuite/python/tests/test_130_1.py b/testsuite/python/tests/test_130_1.py index 2a8e0b5d533..0508bc09357 100644 --- a/testsuite/python/tests/test_130_1.py +++ b/testsuite/python/tests/test_130_1.py @@ -20,19 +20,19 @@ def section_description(): @pytest.fixture(scope='module') def first_job(): """Submit a non-held job""" - return atf.submit_job(fatal=True) + return atf.submit_job_sbatch(fatal=True) @pytest.fixture(scope='module') def second_job(): """Submit a second non-held job""" - return atf.submit_job(fatal=True) + return atf.submit_job_sbatch(fatal=True) @pytest.fixture(scope='module') def held_job(): """Submit a held job""" - return atf.submit_job("--hold --wrap=\"sleep 60\"", fatal=True) + return atf.submit_job_sbatch("--hold --wrap=\"sleep 60\"", fatal=True) @pytest.fixture(scope='module') diff --git a/testsuite/python/tests/test_130_3.py b/testsuite/python/tests/test_130_3.py index f1a2657ed53..b8a16c72f4d 100644 --- a/testsuite/python/tests/test_130_3.py +++ b/testsuite/python/tests/test_130_3.py @@ -20,19 +20,19 @@ def section_description(): @pytest.fixture(scope='module') def first_job(): """Submit a non-held job""" - return atf.submit_job(fatal=True) + return atf.submit_job_sbatch(fatal=True) @pytest.fixture(scope='module') def second_job(): """Submit a second non-held job""" - return atf.submit_job(fatal=True) + return atf.submit_job_sbatch(fatal=True) @pytest.fixture(scope='module') def held_job(): """Submit a held job""" - return atf.submit_job("--hold --wrap=\"sleep 60\"", fatal=True) + return atf.submit_job_sbatch("--hold --wrap=\"sleep 60\"", fatal=True) @pytest.fixture(scope='module') diff --git a/testsuite/python/tests/test_132_1.py b/testsuite/python/tests/test_132_1.py index da623416791..bcfb3e3a925 100644 --- a/testsuite/python/tests/test_132_1.py +++ b/testsuite/python/tests/test_132_1.py @@ -21,7 +21,7 @@ def test_prolog_success(tmp_path): atf.make_bash_script(prolog_script, f"touch {prolog_touched_file}") atf.set_config_parameter('Prolog', prolog_script) - job_id = atf.submit_job(fatal=True) + job_id = atf.submit_job_sbatch(fatal=True) # Verify that the prolog ran by checking for the file creation assert atf.wait_for_file(prolog_touched_file), f"File ({prolog_touched_file}) was not created" @@ -42,7 +42,7 @@ def test_prolog_failure(tmp_path): """) atf.set_config_parameter('Prolog', prolog_script) - job_id = atf.submit_job(fatal=True) + job_id = atf.submit_job_sbatch(fatal=True) # Verify that the prolog ran by checking for the file creation assert atf.wait_for_file(prolog_output_file), f"File ({prolog_output_file}) was not created" diff --git a/testsuite/python/tests/test_136_1.py b/testsuite/python/tests/test_136_1.py index 6ab7184a4b4..8ccc7d9770b 100644 --- a/testsuite/python/tests/test_136_1.py +++ b/testsuite/python/tests/test_136_1.py @@ -68,14 +68,14 @@ def test_job_denied(node_names): def test_node_state(node_names, teardown_jobs): """Verify that sinfo state is returned as 'alloc' when using all cpus except specialized cores""" - job_id = atf.submit_job(f"-w {node_names} -n{available_cores} --wrap='srun sleep 60'") + job_id = atf.submit_job_sbatch(f"-w {node_names} -n{available_cores} --wrap='srun sleep 60'") atf.wait_for_job_state(job_id, "RUNNING") assert len(re.findall("alloc", atf.run_command_output(f"sinfo -n {node_names} -h -N -o%t"))) == 2, "node states in sinfo should be both 'alloc'" atf.cancel_all_jobs(quiet=True) - job_id = atf.submit_job(f"-w {node_names} -n2 --wrap='srun sleep 60'") + job_id = atf.submit_job_sbatch(f"-w {node_names} -n2 --wrap='srun sleep 60'") atf.wait_for_job_state(job_id, "RUNNING") assert len(re.findall("alloc", atf.run_command_output(f"sinfo -n {node_names} -h -N -o%t"))) == 1, "one node state in sinfo should be 'alloc'" @@ -87,7 +87,7 @@ def test_core_spec_override(node_names): a job, you should be able to use the extra cores. """ - job_id = atf.submit_job(f"-w {node_names} --core-spec=0 -n{total_cores} --wrap='srun true'") + job_id = atf.submit_job_sbatch(f"-w {node_names} --core-spec=0 -n{total_cores} --wrap='srun true'") atf.wait_for_job_state(job_id, "DONE") output = int(re.findall( @@ -96,7 +96,7 @@ def test_core_spec_override(node_names): assert output == total_cores, f"--core-spec=0 should allow {total_cores} cores" - job_id = atf.submit_job(f"-w {node_names} --core-spec=0 --wrap='srun true'") + job_id = atf.submit_job_sbatch(f"-w {node_names} --core-spec=0 --wrap='srun true'") atf.wait_for_job_state(job_id, "DONE") output = int(re.findall( @@ -105,7 +105,7 @@ def test_core_spec_override(node_names): assert output == total_cores, f"Using --core-spec should imply --exclusive and using all cores" - job_id = atf.submit_job(f"-w {node_names} --core-spec=1 -n{total_cores - 2} --wrap='srun true'") + job_id = atf.submit_job_sbatch(f"-w {node_names} --core-spec=1 -n{total_cores - 2} --wrap='srun true'") atf.wait_for_job_state(job_id, "DONE") output = int(re.findall( @@ -114,7 +114,7 @@ def test_core_spec_override(node_names): assert output == total_cores - 2, f"--core-spec=1 should allocate all cores except 1 per node" - job_id = atf.submit_job(f"-w {node_names} --core-spec=2 -n{total_cores - 4} --wrap='srun true'") + job_id = atf.submit_job_sbatch(f"-w {node_names} --core-spec=2 -n{total_cores - 4} --wrap='srun true'") atf.wait_for_job_state(job_id, "DONE") output = int(re.findall( @@ -132,7 +132,7 @@ def test_thread_spec_override(node_names): a job, you should be able to use the extra threads. """ - job_id = atf.submit_job(f"-w {node_names} --thread-spec=1 --wrap='srun true'") + job_id = atf.submit_job_sbatch(f"-w {node_names} --thread-spec=1 --wrap='srun true'") atf.wait_for_job_state(job_id, "DONE") output = int(re.findall( diff --git a/testsuite/python/tests/test_137_1.py b/testsuite/python/tests/test_137_1.py index fc72e6e8623..ae6a9ed6666 100644 --- a/testsuite/python/tests/test_137_1.py +++ b/testsuite/python/tests/test_137_1.py @@ -92,7 +92,7 @@ def test_request_adds_new_ActiveFeature(our_node): make_rebooter_script(our_node, out_file) # Submit a job with inactive available feature 'f2', should trigger reboot - atf.submit_job(f"--wrap='true' -C f2 -w {our_node}", fatal=True) + atf.submit_job_sbatch(f"--wrap='true' -C f2 -w {our_node}", fatal=True) # Wait for output from rebooter script that indicates a successful reboot atf.repeat_command_until(f"cat {out_file}", lambda results: re.search(r'done', results['stdout']), fatal=True, timeout=30) diff --git a/testsuite/python/tests/test_144_4.py b/testsuite/python/tests/test_144_4.py index 7176b54f7c3..e2cfc542d54 100644 --- a/testsuite/python/tests/test_144_4.py +++ b/testsuite/python/tests/test_144_4.py @@ -51,7 +51,7 @@ def test_gres_overlap(): """Test gres without file and --overlap""" output_file = f"{atf.module_tmp_path}/out" - job_id = atf.submit_job(f"-wnode2 -N1 --gres=r2:1 \ + job_id = atf.submit_job_sbatch(f"-wnode2 -N1 --gres=r2:1 \ --output={output_file} --wrap='\ srun --overlap --gres=r2:1 hostname &\ srun --overlap --gres=r2:1 hostname &\ diff --git a/testsuite/python/tests/test_144_6.py b/testsuite/python/tests/test_144_6.py index e432393a9dc..6e5f79dca49 100644 --- a/testsuite/python/tests/test_144_6.py +++ b/testsuite/python/tests/test_144_6.py @@ -34,8 +34,8 @@ def test_no_consume(): def test_no_consume_parallel(): """Test no_consume gres with parallel jobs""" - job_id_1 = atf.submit_job('--gres=r1 --mem=1 --wrap="sleep 20"') - job_id_2 = atf.submit_job('--gres=r1 --mem=1 --wrap="sleep 20"') + job_id_1 = atf.submit_job_sbatch('--gres=r1 --mem=1 --wrap="sleep 20"') + job_id_2 = atf.submit_job_sbatch('--gres=r1 --mem=1 --wrap="sleep 20"') atf.wait_for_job_state(job_id_1, 'RUNNING') atf.wait_for_job_state(job_id_2, 'RUNNING') squeue = atf.run_command_output('squeue') From 470c08198d39cb583577e3dd30c25787fadebeb7 Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Mon, 22 May 2023 16:22:45 -0600 Subject: [PATCH 24/81] Testsuite - atf.alloc_job_id => atf.submit_job_salloc refactor Bug 16795 --- testsuite/python/lib/atf.py | 2 +- testsuite/python/tests/test_134_1.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index 8bce7222777..b30fd4ee480 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -1476,7 +1476,7 @@ def run_job_id(srun_args, **run_command_kwargs): # Return job id (command should not be interactive/shell) -def alloc_job_id(salloc_args, **run_command_kwargs): +def submit_job_salloc(salloc_args, **run_command_kwargs): """Submits a job using salloc and returns the job id. The submitted job will automatically be cancelled when the test ends. diff --git a/testsuite/python/tests/test_134_1.py b/testsuite/python/tests/test_134_1.py index 1402634e24e..206bfffcdae 100644 --- a/testsuite/python/tests/test_134_1.py +++ b/testsuite/python/tests/test_134_1.py @@ -19,7 +19,7 @@ def setup(): @pytest.fixture(scope='module') def salloc_noshell(): """Submit a backgrounded salloc job""" - job_id = atf.alloc_job_id("--verbose --no-shell", fatal=True) + job_id = atf.submit_job_salloc("--verbose --no-shell", fatal=True) atf.wait_for_job_state(job_id, 'RUNNING', fatal=True) return job_id From 6ce7e6dbc5165338065a2cd863558dc57abd637c Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Mon, 22 May 2023 16:24:31 -0600 Subject: [PATCH 25/81] Testsuite - atf.run_job_id => atf.submit_job_srun refactor Bug 16795 --- testsuite/python/lib/atf.py | 2 +- testsuite/python/tests/test_116_12.py | 10 +++++----- testsuite/python/tests/test_116_27.py | 4 ++-- testsuite/python/tests/test_116_41.py | 2 +- testsuite/python/tests/test_127_1.py | 2 +- testsuite/python/tests/test_136_2.py | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index b30fd4ee480..e94acdc56c6 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -1451,7 +1451,7 @@ def run_job_error(srun_args, **run_command_kwargs): # Return job id -def run_job_id(srun_args, **run_command_kwargs): +def submit_job_srun(srun_args, **run_command_kwargs): """Runs a job using srun and returns the job id. This function obtains the job id by adding the -v option to srun diff --git a/testsuite/python/tests/test_116_12.py b/testsuite/python/tests/test_116_12.py index 681448957a0..190c0859daa 100644 --- a/testsuite/python/tests/test_116_12.py +++ b/testsuite/python/tests/test_116_12.py @@ -71,13 +71,13 @@ def test_output_error_formatting(tmp_path): # Test %j puts the job id in the file names file_out = fpc.create_file_path("j") - job_id = atf.run_job_id(f"--output={file_out} -N1 -O id") + job_id = atf.submit_job_srun(f"--output={file_out} -N1 -O id") file_out = fpc.get_tmp_file() assert re.search(str(job_id), file_out) is not None, f"%j: Job id ({job_id}) was not in file name ({file_out})" fpc.remove_file(file_out) file_err = fpc.create_file_path("j", ERROR_TYPE) - job_id = atf.run_job_id(f"--error={file_err} -N1 -O uid") + job_id = atf.submit_job_srun(f"--error={file_err} -N1 -O uid") file_err = fpc.get_tmp_file() assert re.search(str(job_id), file_err) is not None, f"%j: Job id ({job_id}) was not in file name ({file_err})" fpc.remove_file(file_err) @@ -183,14 +183,14 @@ def test_output_error_formatting(tmp_path): job_command = "uid" file_out = fpc.create_file_path("x") file_err = fpc.create_file_path("x", ERROR_TYPE) - job_id = atf.run_job_id(f"--output={file_out} {job_command}") + job_id = atf.submit_job_srun(f"--output={file_out} {job_command}") job_name = atf.get_job_parameter(job_id, "JobName") assert job_command == job_name, f"%x: Job command ({job_command}) is not the same as the JobName ({job_name})" result_out = fpc.create_file(job_command) assert result_out in os.listdir(tmp_path), f"%x: Output file ({result_out}) was not created" fpc.remove_file(result_out) - job_id = atf.run_job_id(f"--error={file_err} {job_command}") + job_id = atf.submit_job_srun(f"--error={file_err} {job_command}") job_name = atf.get_job_parameter(job_id, "JobName") assert job_command == job_name, f"%x: Job command ({job_command}) is not the same as the JobName ({job_name})" result_err = fpc.create_file(job_command, ERROR_TYPE) @@ -210,7 +210,7 @@ def test_output_error_formatting(tmp_path): assert re.search(node_host_name, result_out) is not None, f"%N: Output file ({result_out}) does not contain NodeHostName ({node_host_name})" fpc.remove_file(result_out) - job_id = atf.run_job_id(f"--error={file_err} true") + job_id = atf.submit_job_srun(f"--error={file_err} true") node_name = atf.get_job_parameter(job_id, "NodeList") node_host_name = atf.get_node_parameter(node_name, "NodeHostName") node_addr = atf.get_node_parameter(node_name, "NodeAddr") diff --git a/testsuite/python/tests/test_116_27.py b/testsuite/python/tests/test_116_27.py index 1549ee2c466..0e0ad6d3788 100644 --- a/testsuite/python/tests/test_116_27.py +++ b/testsuite/python/tests/test_116_27.py @@ -34,7 +34,7 @@ def test_mail_type_and_mail_user(mail_program_out): """Test of mail options (--mail-type and --mail-user options).""" slurm_user = 'slurm-user' - job_id = atf.run_job_id(f"--mail-type=all --mail-user={atf.properties[slurm_user]} id") + job_id = atf.submit_job_srun(f"--mail-type=all --mail-user={atf.properties[slurm_user]} id") atf.wait_for_file(mail_program_out, fatal=True) output = atf.run_command_output(f"cat {mail_program_out}") assert re.findall(rf"SLURM_JOB_ID={job_id} SLURM_JOB_USER={atf.properties[slurm_user]} SLURM_JOB_MAIL_TYPE=Began", output) is not None, "Start mail not sent" @@ -46,7 +46,7 @@ def test_mail_type_and_mail_user(mail_program_out): output = atf.run_command_output(f"cat {mail_program_out}") assert re.findall(rf"SLURM_JOB_ID=\d+ SLURM_JOB_USER={atf.properties[slurm_user]} SLURM_JOB_MAIL_TYPE=Ended", output) is not None, "End mail not sent after job was canceled" - job_id = atf.run_job_id(f"-t1 --mail-type=ALL,TIME_LIMIT,TIME_LIMIT_90,TIME_LIMIT_80,TIME_LIMIT_50 --mail-user={atf.properties[slurm_user]} sleep 300", timeout=120, xfail=True) + job_id = atf.submit_job_srun(f"-t1 --mail-type=ALL,TIME_LIMIT,TIME_LIMIT_90,TIME_LIMIT_80,TIME_LIMIT_50 --mail-user={atf.properties[slurm_user]} sleep 300", timeout=120, xfail=True) time.sleep(5) output = atf.run_command_output(f"cat {mail_program_out}") assert re.findall(rf"SLURM_JOB_ID={job_id} SLURM_JOB_USER={atf.properties[slurm_user]} SLURM_JOB_MAIL_TYPE=Began", output) is not None, "Start mail not sent for timeout test" diff --git a/testsuite/python/tests/test_116_41.py b/testsuite/python/tests/test_116_41.py index 0617b6ab771..354eedebdf2 100644 --- a/testsuite/python/tests/test_116_41.py +++ b/testsuite/python/tests/test_116_41.py @@ -15,6 +15,6 @@ def test_job_name(): # Compare the job name given too the JobName field in scontrol job_name = "AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMM" - job_id = atf.run_job_id(f"--job-name={job_name} true") + job_id = atf.submit_job_srun(f"--job-name={job_name} true") job_param_name = atf.get_job_parameter(job_id, "JobName") assert job_name == job_param_name diff --git a/testsuite/python/tests/test_127_1.py b/testsuite/python/tests/test_127_1.py index 3882335d8d7..4dab3f97fd8 100644 --- a/testsuite/python/tests/test_127_1.py +++ b/testsuite/python/tests/test_127_1.py @@ -42,7 +42,7 @@ def test_epilog(tmp_path): atf.set_config_parameter('Epilog', epilog) # Verify that the epilog ran by checking for the file creation - job_id = atf.run_job_id(f"-t1 true", fatal=True) + job_id = atf.submit_job_srun(f"-t1 true", fatal=True) # TODO: Temporary debug mechanics to troubleshoot bug 14466 (remove once fixed) global srun_ran_successfully diff --git a/testsuite/python/tests/test_136_2.py b/testsuite/python/tests/test_136_2.py index 90f5e1058b7..f50575e45e4 100644 --- a/testsuite/python/tests/test_136_2.py +++ b/testsuite/python/tests/test_136_2.py @@ -57,7 +57,7 @@ def create_cpu_list(node): def test_job_submit(node_name): """Verify a job requesting a proper number of cpus is submitted with CPUSpecList plugin enabled""" - job_id = atf.run_job_id(f"-w {node_name} -N1 -n{available_cores} true") + job_id = atf.submit_job_srun(f"-w {node_name} -N1 -n{available_cores} true") output = atf.run_command_output(f"scontrol show job {job_id} -dd | grep CPU_IDs= | awk '{{print $2}}' | sed 's/^.*CPU_IDs=//'") cpu_spec_list = atf.get_node_parameter(node_name, 'CPUSpecList') From 168442878d63b8708d2830c3f75e3b6e13ce5dff Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Fri, 19 May 2023 15:29:37 -0600 Subject: [PATCH 26/81] Testsuite - Add generic sumbit_job to atf.py Add generic helper method to submit jobs with the given command and arguments, returning the job id. Bug 16795 --- testsuite/python/lib/atf.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index e94acdc56c6..af3f0b6134e 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -1499,6 +1499,37 @@ def submit_job_salloc(salloc_args, **run_command_kwargs): return 0 +# Return job id +def submit_job(command, job_param, job, *, wrap_job=True, **run_command_kwargs): + """Submits a job using given command and returns the job id. + + Args*: + command (string): The command to submit the job (salloc, srun, sbatch). + job_param (string): The arguments to the job. + job (string): The command or job file to be executed. + wrap_job (boolean): If job needs to be wrapped when command is sbatch. + + * run_command arguments are also accepted (e.g. fatal) and will be supplied + to the underlying job_id and subsequent run_command call. + + Returns: The job id. + """ + + # Make sure command is a legal command to run a job + assert command in ["salloc", "srun", "sbatch"], \ + f"Invalid command '{command}'. Should be salloc, srun, or sbatch." + + if command == "salloc": + return submit_job_salloc(f"{job_param} {job}", **run_command_kwargs) + elif command == "srun": + return submit_job_srun(f"{job_param} {job}", **run_command_kwargs) + elif command == "sbatch": + # If the job should be wrapped, do so before submitting + if wrap_job: + job = f"--wrap '{job}'" + return submit_job_sbatch(f"{job_param} {job}", **run_command_kwargs) + + def run_job_nodes(srun_args, **run_command_kwargs): """Runs a job using srun and returns the allocated node list. From 437134a78d52728f0b3753cceb87252a084fe3d4 Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Wed, 24 May 2023 01:24:20 -0600 Subject: [PATCH 27/81] Testsuite - Standardize atf.submit_job_srun return code behavior atf.submit_job_srun returns 0 instead of None, now like atf.submit_job_salloc and atf.submit_job_sbatch. Bug 16795 --- testsuite/python/lib/atf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index af3f0b6134e..baf89ab50ca 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -1473,6 +1473,8 @@ def submit_job_srun(srun_args, **run_command_kwargs): if match := re.search(r"jobid (\d+)", results['stderr']): return int(match.group(1)) + else: + return 0 # Return job id (command should not be interactive/shell) From 8d64122e3c559274be1a942ae5f700046ccca4d1 Mon Sep 17 00:00:00 2001 From: Danny Auble Date: Wed, 24 May 2023 15:27:00 -0600 Subject: [PATCH 28/81] Testsuite - Remove double add of account and user. This also adds a cleanup to start to remove vestigial associations as well as moves all the procs to the top instead of the middle of the file. --- testsuite/expect/test12.4 | 108 ++++++++++---------------------------- 1 file changed, 28 insertions(+), 80 deletions(-) diff --git a/testsuite/expect/test12.4 b/testsuite/expect/test12.4 index 9fbe21303f4..1c4c097b830 100755 --- a/testsuite/expect/test12.4 +++ b/testsuite/expect/test12.4 @@ -31,85 +31,6 @@ source ./globals set test_acct "${test_name}_acct" set timeout 60 -# -# Check accounting config and bail if not found. -# -if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { - skip "This test can't be run without a usable AccountStorageType" -} - -if {[get_admin_level] ne "Administrator"} { - skip "This test can't be run without being an Accounting administrator. Use: sacctmgr mod user \$USER set admin=admin" -} - -proc cleanup {} { - global sacctmgr test_acct - - run_command "$sacctmgr -i delete account $test_acct" -} - -set accounting_storage_type [get_config_param "AccountingStorageType"] - -set cluster [get_config_param "ClusterName"] -# -# Identify the user and his group -# -set user_name [get_my_user_name] -set user_gid [get_my_gid] - -# -# Use sacctmgr to add an account -# -set aamatches 0 -spawn $sacctmgr -i add account $test_acct cluster=$cluster -expect { - -re "Adding Account" { - incr aamatches - exp_continue - } - -re "Nothing new added" { - log_warn "Vestigial account $test_acct found" - incr aamatches - exp_continue - } - timeout { - fail "sacctmgr add not responding" - } - eof { - wait - } -} -if {$aamatches != 1} { - fail "sacctmgr had a problem adding account" -} - -# -# Add self to this new account -# -spawn $sacctmgr -i create user name=$user_name account=$test_acct cluster=$cluster -expect { - timeout { - fail "sacctmgr add not responding" - } - eof { - wait - } -} - - -# -# Spawn a job via srun using this account -# -set output [run_command_output -fail "$srun -N1 -v --account=$test_acct $bin_id"] -if {![regexp "launching StepId=($number)\.$re_word_str" $output - job_id]} { - fail "Did not get srun job id" -} -subtest {[get_job_param $job_id "Account"] eq $test_acct} "Verify srun job is using the specified account" - -if {$accounting_storage_type eq "accounting_storage/slurmdbd"} { - sleep 12 -} - ################################################################ # # Proc: sacct_job @@ -416,7 +337,34 @@ proc sacct_vargs { soption vargs job_id} { return $matches } } -################################################################ + +proc cleanup {} { + global sacctmgr test_acct + + run_command "$sacctmgr -i delete account $test_acct" +} + +# +# Check accounting config and bail if not found. +# +if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { + skip "This test can't be run without a usable AccountStorageType" +} + +if {[get_admin_level] ne "Administrator"} { + skip "This test can't be run without being an Accounting administrator. Use: sacctmgr mod user \$USER set admin=admin" +} + +cleanup + +set accounting_storage_type [get_config_param "AccountingStorageType"] + +set cluster [get_config_param "ClusterName"] +# +# Identify the user and his group +# +set user_name [get_my_user_name] +set user_gid [get_my_gid] # # Use sacctmgr to add an account From 5aa84b2ae835364176e58accc3d72089555a6ef9 Mon Sep 17 00:00:00 2001 From: Jonathan de Gaston Date: Thu, 25 May 2023 13:46:26 -0600 Subject: [PATCH 29/81] Testsuite - Add whereami.c to atf Update atf.py to include whereami.c and require_whereami() Bug 16781 --- testsuite/python/lib/atf.py | 35 ++++++++++++ testsuite/python/scripts/whereami.c | 82 +++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 testsuite/python/scripts/whereami.c diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index baf89ab50ca..87a367babe3 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -875,6 +875,40 @@ def remove_config_parameter_value(name, value, source='slurm'): set_config_parameter(name, None, source=source) +def require_whereami(): + """Compiles the whereami.c program to be used by tests + + This function installs the whereami program. To get the + correct output, TaskPlugin is required in the slurm.conf + file before slurm starts up. + ex: TaskPlugin=task/cray_aries,task/cgroup,task/affinity + + The file will be installed in the testsuite/python/lib/scripts + directory where the whereami.c file is located + + Examples: + >>> atf.require_whereami() + >>> print('\nwhereami is located at', atf.properties['whereami']) + >>> output = atf.run_command(f"srun {atf.properties['whereami']}", + >>> user=atf.properties['slurm-user']) + """ + require_config_parameter("TaskPlugin", + "task/cray_aries,task/cgroup,task/affinity") + # If the file already exists and we don't need to recompile + dest_file = f"{properties['testsuite_scripts_dir']}/whereami" + if os.path.isfile(dest_file): + properties['whereami'] = dest_file + return + + source_file = f"{properties['testsuite_scripts_dir']}/whereami.c" + if not os.path.isfile(source_file): + pytest.fail('Could not find whereami.c!') + + run_command(f"gcc {source_file} -o {dest_file}", fatal=True, + user=properties['slurm-user']) + properties['whereami'] = dest_file + + def require_config_parameter(parameter_name, parameter_value, condition=None, source='slurm', skip_message=None): """Ensures that a configuration parameter has the required value. @@ -2443,6 +2477,7 @@ def _note(message, *args, **kwargs): properties['slurm-source-dir'] = str(pathlib.Path(__file__).resolve().parents[3]) properties['slurm-build-dir'] = properties['slurm-source-dir'] properties['slurm-prefix'] = '/usr/local' +properties['testsuite_scripts_dir'] = properties['testsuite_base_dir'] + '/python/scripts' # Override directory properties with values from testsuite.conf file testsuite_config = {} diff --git a/testsuite/python/scripts/whereami.c b/testsuite/python/scripts/whereami.c new file mode 100644 index 00000000000..53f992821cc --- /dev/null +++ b/testsuite/python/scripts/whereami.c @@ -0,0 +1,82 @@ +/*****************************************************************************\ + * test1.91.prog.c - Simple test program for SLURM regression test1.91. + * Reports SLURM task ID and the CPU mask, + * similar functionality to "taskset" command + ***************************************************************************** + * Copyright (C) 2005 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of SLURM, a resource management program. + * For details, see . + * Please also read the included file: DISCLAIMER. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#define _GNU_SOURCE +#define __USE_GNU +#include +#include +#include +#include +#include + +static char *_get_cpu_bindings() +{ + FILE *cpuinfo = fopen("/proc/self/status", "rb"); + char *line = 0; + size_t size = 0; + char *cpus = calloc(1024, sizeof(char)); + while(getdelim(&line, &size, '\n', cpuinfo) != -1) { + if (strstr(line, "Cpus_")) { + char *end = strstr(line, "\n"); + if (end) + *end = '\0'; + sprintf(cpus + strlen(cpus), "%s%s", line, (cpus[0]) ? "" : "\t"); + } + } + free(line); + fclose(cpuinfo); + return cpus; +} + + +int main (int argc, char **argv) +{ + char *task_str; + char *node_name; + int task_id; + + /* On POE systems, MP_CHILD is equivalent to SLURM_PROCID */ + if (((task_str = getenv("SLURM_PROCID")) == NULL) && + ((task_str = getenv("MP_CHILD")) == NULL)) { + fprintf(stderr, "ERROR: getenv(SLURM_PROCID) failed\n"); + exit(1); + } + + node_name = getenv("SLURMD_NODENAME"); + task_id = atoi(task_str); + printf("%4d %s - %s\n", task_id, node_name, _get_cpu_bindings()); + + + if (argc > 1) { + int sleep_time = strtol(argv[1] ,0, 10); + //printf("sleeping %d seconds\n", sleep_time); + fflush(stdout); + sleep(sleep_time); + } + exit(0); +} From 64b14a1bdd168053be8b2bbe002b73a2ff4e2512 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 26 May 2023 19:16:43 +0200 Subject: [PATCH 30/81] Testsuite - Improve atf.py increasing timeouts for DB commands Sometimes the default timeout of 60s is just not enough. Signed-off-by: Tom Johns --- testsuite/python/lib/atf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index 87a367babe3..77546ff6130 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -26,6 +26,7 @@ default_command_timeout = 60 default_polling_timeout = 15 +default_sql_cmd_timeout = 120 def node_range_to_list(node_expression): @@ -2153,7 +2154,7 @@ def backup_accounting_database(): else: mysqldump_command = f"{mysqldump_path} {mysql_options} {database_name} > {sql_dump_file}" - run_command(mysqldump_command, fatal=True, quiet=True) + run_command(mysqldump_command, fatal=True, quiet=True, timeout=default_sql_cmd_timeout) def restore_accounting_database(): @@ -2197,11 +2198,11 @@ def restore_accounting_database(): # If the sticky bit is set and the dump file is empty, remove the database. # Otherwise, restore the dump. - run_command(f"{base_command} -e \"drop database {database_name}\"", fatal=True, quiet=True) + run_command(f"{base_command} -e \"drop database {database_name}\"", fatal=True, quiet=False, timeout=default_sql_cmd_timeout) dump_stat = os.stat(sql_dump_file) if not (dump_stat.st_size == 0 and dump_stat.st_mode & stat.S_ISVTX): run_command(f"{base_command} -e \"create database {database_name}\"", fatal=True, quiet=True) - run_command(f"{base_command} {database_name} < {sql_dump_file}", fatal=True, quiet=True) + run_command(f"{base_command} {database_name} < {sql_dump_file}", fatal=True, quiet=True, timeout=default_sql_cmd_timeout) # In either case, remove the dump file run_command(f"rm -f {sql_dump_file}", fatal=True, quiet=True) From f4d3f7932e263c5e4a91f4604d449edeff8719b0 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 12 May 2023 17:29:06 +0200 Subject: [PATCH 31/81] Testsuite - Fix test5.9 for switch/hpe_slingshot Some --network parameters are rejected with switch/hpe_slingshot. Bug 16631 Signed-off-by: Tim McMullan --- testsuite/expect/test5.9 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testsuite/expect/test5.9 b/testsuite/expect/test5.9 index bf22ab40260..b3143fa5032 100755 --- a/testsuite/expect/test5.9 +++ b/testsuite/expect/test5.9 @@ -37,6 +37,11 @@ set file_err "$test_dir/error" set file_in "$test_dir/$job_script" set file_out "$test_dir/output" set test_acct "${test_name}_acct" +set network_param "ip" + +if {[param_contains [get_config_param "SwitchType"] "*slingshot"]} { + set network_param "depth" +} ############################Job Format Test############################ @@ -64,7 +69,7 @@ set sq_format(account) $test_acct # cpus-per-task set sq_format(cpuspertask) 2 # network -set sq_format(network) "ip" +set sq_format(network) $network_param # requeue set sq_format(requeue) 1 # profile From 7957579e700ecc209ac9e6be96be8349da2773c6 Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 7 Jun 2023 14:41:17 -0600 Subject: [PATCH 32/81] Testsuite - Update tox.ini to use xunit1 Update tox.ini to set the junit_family=xunit1 to maintain the legacy format of the junit.xml for pytest >= 6.1.0 Bug 16921 --- testsuite/python/tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/python/tox.ini b/testsuite/python/tox.ini index 8854569a52e..081de063c1c 100644 --- a/testsuite/python/tox.ini +++ b/testsuite/python/tox.ini @@ -4,3 +4,4 @@ log_format = [%(asctime)s.%(msecs)03d] %(levelname)-8s %(message)s (%(traceback)s) log_date_format = %Y-%m-%d %H:%M:%S log_level = DEBUG +junit_family=xunit1 From 235a7a8240f0de61981e82ad4b5fed812729c69f Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 7 Jun 2023 15:14:41 -0600 Subject: [PATCH 33/81] Testsuite - Add spaces to maintain style --- testsuite/python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/python/tox.ini b/testsuite/python/tox.ini index 081de063c1c..78a7d0d3d1f 100644 --- a/testsuite/python/tox.ini +++ b/testsuite/python/tox.ini @@ -4,4 +4,4 @@ log_format = [%(asctime)s.%(msecs)03d] %(levelname)-8s %(message)s (%(traceback)s) log_date_format = %Y-%m-%d %H:%M:%S log_level = DEBUG -junit_family=xunit1 +junit_family = xunit1 From 3940316f2855bc976f8e6a1e3ed898979160ec57 Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Tue, 13 Jun 2023 16:50:03 +0000 Subject: [PATCH 34/81] Testsuite - Add require_tool to atf --- testsuite/python/lib/atf.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index 77546ff6130..bc8b87b46ae 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -876,6 +876,19 @@ def remove_config_parameter_value(name, value, source='slurm'): set_config_parameter(name, None, source=source) +def is_tool(tool): + """Returns True if the tool is found in PATH""" + from shutil import which + return which(tool) is not None + + +def require_tool(tool): + """Skips if the supplied tool is not found""" + if not is_tool(tool): + msg = f"This test requires '{tool}' and it was not found" + pytest.skip(msg, allow_module_level=True) + + def require_whereami(): """Compiles the whereami.c program to be used by tests From 05f1c537a325adce12dc0281831fc3cb544ea947 Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Wed, 14 Jun 2023 17:33:52 +0000 Subject: [PATCH 35/81] Testsuite - Update require_wherami with skip/cray --- testsuite/python/lib/atf.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index bc8b87b46ae..d87e3c5a7fd 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -889,7 +889,7 @@ def require_tool(tool): pytest.skip(msg, allow_module_level=True) -def require_whereami(): +def require_whereami(is_cray=False): """Compiles the whereami.c program to be used by tests This function installs the whereami program. To get the @@ -906,8 +906,13 @@ def require_whereami(): >>> output = atf.run_command(f"srun {atf.properties['whereami']}", >>> user=atf.properties['slurm-user']) """ - require_config_parameter("TaskPlugin", - "task/cray_aries,task/cgroup,task/affinity") + require_config_parameter("TaskPlugin", "task/cgroup,task/affinity") + + # Set requirement for cray systems + if is_cray: + require_config_parameter("TaskPlugin", + "task/cray_aries,task/cgroup,task/affinity") + # If the file already exists and we don't need to recompile dest_file = f"{properties['testsuite_scripts_dir']}/whereami" if os.path.isfile(dest_file): @@ -916,7 +921,7 @@ def require_whereami(): source_file = f"{properties['testsuite_scripts_dir']}/whereami.c" if not os.path.isfile(source_file): - pytest.fail('Could not find whereami.c!') + pytest.skip("Could not find whereami.c!", allow_module_level=True) run_command(f"gcc {source_file} -o {dest_file}", fatal=True, user=properties['slurm-user']) From 0c3f81c20d3e533179b07c29d5e844f1ec235658 Mon Sep 17 00:00:00 2001 From: Nathan Rini Date: Tue, 10 Jan 2023 12:36:19 -0700 Subject: [PATCH 36/81] Testsuite - fix logging failure Not all logs provide "traceback" key causing pytest to fail even if the test has not failed. Bug 15765 --- testsuite/python/tox.ini | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testsuite/python/tox.ini b/testsuite/python/tox.ini index 78a7d0d3d1f..d13c1ceabda 100644 --- a/testsuite/python/tox.ini +++ b/testsuite/python/tox.ini @@ -1,7 +1,9 @@ # Do not change this file as these are the slurm package defaults. # You may override these or other settings within a separate pytest.ini file. +# TJ - 20230620: Traceback in the log format was removed, original is below +# log_format = [%(asctime)s.%(msecs)03d] %(levelname)-8s %(message)s (%(traceback)s) [pytest] -log_format = [%(asctime)s.%(msecs)03d] %(levelname)-8s %(message)s (%(traceback)s) +log_format = [%(asctime)s.%(msecs)03d] %(levelname)-8s %(message)s log_date_format = %Y-%m-%d %H:%M:%S log_level = DEBUG junit_family = xunit1 From 297a3a8466624df5c4cc097babc2cb9f0d90bbf4 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 22 May 2023 09:51:00 -0600 Subject: [PATCH 37/81] Testsuite - skip test1.12 on select/linear Test uses --exclusive and --overlap, which behave differently on select/linear Bug 16787 --- testsuite/expect/test1.12 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/testsuite/expect/test1.12 b/testsuite/expect/test1.12 index 02cb010f6d2..9393171f34d 100755 --- a/testsuite/expect/test1.12 +++ b/testsuite/expect/test1.12 @@ -193,6 +193,9 @@ if {[set_nodes_and_threads_by_request "-N1"]} { skip "Test needs to be able to submit a job with -N1." } +if {![check_config_select "cons_tres"] || ![check_config_select "cons_res"]} { + skip "Test requires SelectType=select/cons_tres or cons_res" +} testproc test_overlap_after_overlap testproc test_overlap_after_exclusive testproc test_exclusive_after_overlap From 3dbdfa56ae7fbd332c664ac584e8e7d7042198ba Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 22 May 2023 11:02:00 -0600 Subject: [PATCH 38/81] Testsuite - skip test1.14 on select/linear Test uses limited node resources with --immediate to intentionally fail. This doesn't work with select linear, and can't exercise --immediate the same way Bug 16790 --- testsuite/expect/test1.14 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/expect/test1.14 b/testsuite/expect/test1.14 index 4b01f785b75..40982aa509c 100755 --- a/testsuite/expect/test1.14 +++ b/testsuite/expect/test1.14 @@ -37,6 +37,10 @@ if {[get_config_param "FrontendName"] ne "MISSING"} { skip "This test is incompatible with front-end systems" } +if {[check_config_select "linear"]} { + skip "Test requires SelectType != linear" +} + proc cleanup {} { global job_id file_in file_out test_dir From 943370c7b0d1ac261e1d29942792985949535a99 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 22 May 2023 12:04:33 -0600 Subject: [PATCH 39/81] Testsuite - skip test1.35 on select/linear Test uses --mem-per-cpu on steps to ensure job --mem-per-cpu is enforced. This doesn't work the same with select/linear. Bug 16791 --- testsuite/expect/test1.35 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/expect/test1.35 b/testsuite/expect/test1.35 index 7815c39e506..d4ae0221775 100755 --- a/testsuite/expect/test1.35 +++ b/testsuite/expect/test1.35 @@ -38,6 +38,10 @@ set steps_started 30 set job_mem_opt "--mem-per-cpu=256M" set step_mem_opt "--mem-per-cpu=6M" +if {[check_config_select "linear"]} { + skip "Test requires SelectType!=select/linear" +} + proc cleanup {} { global job_id From 17b33586a02ea529b15b7c2af036cd611854bff7 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 22 May 2023 15:29:14 -0600 Subject: [PATCH 40/81] Testsuite - skip test1.62 on select/linear Test uses --gres which behaves differently on select/linear Bug 16793 --- testsuite/expect/test1.62 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/expect/test1.62 b/testsuite/expect/test1.62 index c5c5d9e61f8..bca591adab7 100755 --- a/testsuite/expect/test1.62 +++ b/testsuite/expect/test1.62 @@ -30,6 +30,10 @@ source ./globals set file_in "$test_dir/job_script" +if {[check_config_select "linear"]} { + skip "Test requires SelectType!=linear" +} + make_bash_script $file_in {echo CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES} proc run_gpu_test { gres_cnt } { From 3d2ab7f006ac5754f338229c0d1e78e3b4621fcc Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Tue, 23 May 2023 10:30:40 -0600 Subject: [PATCH 41/81] Testsuite - skip test7.23 on select/linear Test uses --mem-per-cpu on steps to experiment with LUA plugins. This doesn't work the same with select/linear. Bug 16803 --- testsuite/expect/test7.23 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/testsuite/expect/test7.23 b/testsuite/expect/test7.23 index 68294da3927..f1799b80480 100755 --- a/testsuite/expect/test7.23 +++ b/testsuite/expect/test7.23 @@ -32,6 +32,10 @@ if {![have_lua]} { skip "LUA must be installed and enabled to test lua job_submit plugin." } +if {[check_config_select "linear"]} { + skip "Test requires SelectType!=select/linear" +} + proc test_min_mem { cmd min_mem_per_cpu_exp min_mem_per_node_exp } { set output [run_command_output -fail "$cmd"] set per_cpu_found "" @@ -64,6 +68,7 @@ reconfigure -fail testproc test_min_mem "$salloc -J $job_name $bin_true" nil nil testproc test_min_mem "$srun -J $job_name $bin_true" nil nil testproc test_min_mem "$sbatch -J $job_name -o /dev/null --wrap $bin_true" nil nil + testproc test_min_mem "$salloc -J $job_name --mem-per-cpu=1024 $bin_true" 1024 nil testproc test_min_mem "$srun -J $job_name --mem-per-cpu=1024 $bin_true" 1024 nil testproc test_min_mem "$sbatch -J $job_name --mem-per-cpu=1024 -o /dev/null --wrap $bin_true" 1024 nil From b3b7a1c88a0a94b0d69ef749173675d4bf0bfb5e Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Thu, 13 Jul 2023 11:38:58 +0200 Subject: [PATCH 42/81] Testsuite - Fix test3.11 taking into account PLND Bug 16731 --- testsuite/expect/inc3.11.10 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/inc3.11.10 b/testsuite/expect/inc3.11.10 index 16aa6900bd5..b947712056b 100644 --- a/testsuite/expect/inc3.11.10 +++ b/testsuite/expect/inc3.11.10 @@ -34,7 +34,7 @@ proc inc3_11_10 {} { # log_info "+++++ STARTING TEST 10 +++++" # Make the reservation - set ret_code [create_res $res_name "StartTime=now+60minutes Duration=60 Nodes=[node_list_to_range [get_nodes_by_state idle,power_down,powering_down]] flags=DAILY user=$user_name"] + set ret_code [create_res $res_name "StartTime=now+60minutes Duration=60 Nodes=[node_list_to_range [get_nodes_by_state idle,power_down,powering_down,plnd]] flags=DAILY user=$user_name"] if {$ret_code != 0} { fail "Unable to create a valid reservation" } From f7401441d6d4c423abe4e37d5978d723f784448d Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sun, 9 Jul 2023 11:42:55 +0200 Subject: [PATCH 43/81] Testsuite - Add get_reservations and get_reservation_parameter Bug 16731 --- testsuite/python/lib/atf.py | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/testsuite/python/lib/atf.py b/testsuite/python/lib/atf.py index d87e3c5a7fd..cf9b596817e 100644 --- a/testsuite/python/lib/atf.py +++ b/testsuite/python/lib/atf.py @@ -1376,6 +1376,75 @@ def set_node_parameter(node_name, new_parameter_name, new_parameter_value): if is_slurmctld_running(quiet=True): restart_slurm(quiet=True) +def get_reservations(quiet=False, **run_command_kwargs): + """Returns the reservations as a dictionary of dictionaries. + + Args: + quiet (boolean): If True, logging is performed at the TRACE log level. + + Returns: A dictionary of dictionaries where the first level keys are the + reservation names and with the their values being a dictionary of + configuration parameters for the respective reservation. + """ + + resvs_dict = {} + resv_dict = {} + + output = run_command_output("scontrol show reservations -o", fatal=True, quiet=quiet, **run_command_kwargs) + for line in output.splitlines(): + if line == '': + continue + + while match := re.search(r'^ *([^ =]+)=(.*?)(?= +[^ =]+=| *$)', line): + parameter_name, parameter_value = match.group(1), match.group(2) + + # Remove the consumed parameter from the line + line = re.sub(r'^ *([^ =]+)=(.*?)(?= +[^ =]+=| *$)', '', line) + + # Reformat the value if necessary + if is_integer(parameter_value): + parameter_value = int(parameter_value) + elif is_float(parameter_value): + parameter_value = float(parameter_value) + elif parameter_value == '(null)': + parameter_value = None + + # Add it to the temporary resv dictionary + resv_dict[parameter_name] = parameter_value + + # Add the redv dictionary to the resv dictionary + resvs_dict[resv_dict['ReservationName']] = resv_dict + + # Clear the resv dictionary for use by the next resv + resv_dict = {} + + return resvs_dict + + +def get_reservation_parameter(resv_name, parameter_name, default=None): + """Obtains the value for a reservation configuration parameter. + + Args: + resv_name (string): The reservation name. + parameter_name (string): The parameter name. + default (string or None): This value is returned if the parameter + is not found. + + Returns: The value of the specified reservation parameter, or the default if not + found. + """ + + resvs_dict = get_reservations() + + if resv_name in resvs_dict: + resv_dict = resvs_dict[resv_name] + else: + pytest.fail(f"reservation ({resv_name}) was not found") + + if parameter_name in resv_dict: + return resv_dict[parameter_name] + else: + return default def is_super_user(): uid = os.getuid() From 4d55acb502bc34ab51aa0b9f47e761173e36e516 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Tue, 23 May 2023 12:49:39 -0600 Subject: [PATCH 44/81] Testsuite - skip test9.9 on select/linear Test measures performance with large amount of jobs. Jobs can't be scheduled on the same node with select/linear, so this makes the performance of this test much more dependant on the actual configuration. Bug 16805 --- testsuite/expect/test9.9 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/expect/test9.9 b/testsuite/expect/test9.9 index b326eb950e9..fdcab095f87 100755 --- a/testsuite/expect/test9.9 +++ b/testsuite/expect/test9.9 @@ -33,6 +33,10 @@ if {[get_config_param "FrontendName"] ne "MISSING" || $enable_memory_leak_debug set job_cnt 2 } +if {[check_config_select "linear"]} { + skip "Test requires SelectType != linear" +} + proc cleanup {} { global scancel test_name From 0ae50f48c50dc06a97535e928de1c76abf56c66f Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Thu, 25 May 2023 11:46:06 -0600 Subject: [PATCH 45/81] Testsuite - skip subtests in test21.21 on select/linear Test originally checks for exclusive node allocation via oversubscribtion. Extend these to check for select/linear also to determine exclusive node allocation. Bug 16828 --- testsuite/expect/inc21.21_tests | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testsuite/expect/inc21.21_tests b/testsuite/expect/inc21.21_tests index 6d179f54a6a..84e78ac649d 100644 --- a/testsuite/expect/inc21.21_tests +++ b/testsuite/expect/inc21.21_tests @@ -41,7 +41,7 @@ proc inc21_21_good { test_type limit } { log_info "====== Test $test_type ======" - if {($test_type eq "maxcpus" || $test_type eq "maxcpumins") && [default_part_exclusive] != 0} { + if {($test_type eq "maxcpus" || $test_type eq "maxcpumins") && ([default_part_exclusive] != 0 || [check_config_select "linear"])} { log_warn "Unable to perform test with exclusive node allocations" set is_skip 1 return @@ -140,7 +140,7 @@ proc inc21_21_grp_test { test_type limit } { return } - if { [default_part_exclusive] != 0} { + if { [default_part_exclusive] != 0 || [check_config_select "linear"]} { log_warn "This test can't be run Exclusive node allocations" set is_skip 1 return From 4bd6ad0cddea7285410fa75c956dc7e3ec8b76f8 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Fri, 26 May 2023 12:00:04 -0600 Subject: [PATCH 46/81] Testsuite - adjust resources for test21.35 on select/linear Adjust resource limits (CPUs) to reflect actual node setup instead of minimal hard-coded limits when on select/linear. Bug 16836 --- testsuite/expect/test21.35 | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/testsuite/expect/test21.35 b/testsuite/expect/test21.35 index 7dd56cff578..0a5c076e98f 100755 --- a/testsuite/expect/test21.35 +++ b/testsuite/expect/test21.35 @@ -90,6 +90,19 @@ if {![is_super_user]} { skip "Test can only be ran as SlurmUser" } +if {[check_config_select "linear"]} { + # Adjust the resources to reflect the first node + spawn $scontrol show nodes + expect { + -re {CPUTot=(\d+)} { + set max_tres_limit(cpu) $expect_out(1,string) + set max_tres_limit(billing) $expect_out(1,string) + } + timeout { + fail "'scontrol show nodes' not responding" + } + } +} proc cleanup { } { global sacctmgr test_acct test_qos From e4a22b0b436b838a662dd8b3e2dd3413bbfc5ce8 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Fri, 26 May 2023 16:05:35 -0600 Subject: [PATCH 47/81] Testsuite - skip subtest test_gpus_per_node in test38.18 test_gpus_per_node uses --gpus-per-node, which isn't compatible with select/linear. Bug 16844 --- testsuite/expect/test38.18 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/test38.18 b/testsuite/expect/test38.18 index 28872e09b0b..00347182cc4 100755 --- a/testsuite/expect/test38.18 +++ b/testsuite/expect/test38.18 @@ -486,9 +486,9 @@ proc test_mem_per_gpu {} { test_gpu_bind test_gpu_freq -test_gpus_per_node if {[check_config_select "cons_tres"]} { + test_gpus_per_node test_cpus_per_gpu test_gpus_per_job test_gpus_per_socket From 40f46e51036c25cde9bd792d0fce5db9b17ecdec Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 29 May 2023 10:40:09 -0600 Subject: [PATCH 48/81] Testsuite - modify test39.21 to work with select/linear Modify test39.21 to use manual node selection instead of --gres, which doesn't work with select/linear, to ensure resources are present for test. Bug 16845 --- testsuite/expect/test39.21 | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/testsuite/expect/test39.21 b/testsuite/expect/test39.21 index 16cd0ed44e6..7d511de70e5 100755 --- a/testsuite/expect/test39.21 +++ b/testsuite/expect/test39.21 @@ -56,10 +56,27 @@ ls $nvcc $nvcc ${file_prog}.cu -o $file_prog ./$file_prog" +# +# If select type is linear, pick a node that has a gpu +# Else, use --gres (this doesn't work on select/linear) +# +set get_gpu "--gres=gpu:1" +if {[check_config_select "linear"]} { + set available_nodes [node_list_to_range [get_nodes_by_state]] + set gpu_dict [get_gres_count "gpu" $available_nodes] + dict for {node gpu_count} $gpu_dict { + # Find the first node that has gpu + if {$gpu_count >= 1} { + set get_gpu "-w$node" + break + } + } +} + # # Spawn a batch job to build and run CUDA job # -set job_id [submit_job -fail "--output=$file_out -N1 --gres=gpu:1 -t1 $file_in"] +set job_id [submit_job -fail "--output=$file_out -N1 $get_gpu -t1 $file_in"] # # Wait for job to complete and check for file From eb4ce8c2e7bd8b8a8f45b5cf6a0604c0c0f254a5 Mon Sep 17 00:00:00 2001 From: Jonathan de Gaston Date: Mon, 19 Jun 2023 22:31:10 +0000 Subject: [PATCH 49/81] Testsuite - update test_116_18.py Update test_116_18.py to work with slurm 23.11. Specify SelectType=select/linear in slurm.conf Bug 16860 --- testsuite/python/tests/test_116_18.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testsuite/python/tests/test_116_18.py b/testsuite/python/tests/test_116_18.py index 246014de1fd..0d83229f42e 100644 --- a/testsuite/python/tests/test_116_18.py +++ b/testsuite/python/tests/test_116_18.py @@ -9,6 +9,8 @@ # Setup @pytest.fixture(scope="module", autouse=True) def setup(): + atf.require_config_parameter("SelectType", "select/linear") + atf.require_config_parameter_excludes("SelectTypeParameters", "CR_Core") atf.require_config_parameter('FrontendName', None) atf.require_slurm_running() From 239d50c8017b40825223fa54e532d4de687bd4fb Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Mon, 29 May 2023 16:51:32 -0600 Subject: [PATCH 50/81] Testsuite - Rewrite run-tests in python Rewrite run-tests in python instead of perl to add quick-detection, state-save, and resume functionality. Bug 15918 --- testsuite/README | 7 +- testsuite/run-tests | 862 ++++++++---------- testsuite/src/lib/cli.py | 146 +++ testsuite/src/lib/db/test_db.py | 264 ++++++ .../lib/test_runners/regressions_runner.py | 182 ++++ testsuite/src/lib/test_runners/runner_ui.py | 79 ++ testsuite/src/lib/test_runners/unit_runner.py | 159 ++++ testsuite/src/lib/utils/cmds.py | 91 ++ testsuite/src/lib/utils/conf.py | 18 + testsuite/src/lib/utils/fs.py | 75 ++ testsuite/src/lib/utils/log.py | 34 + testsuite/src/lib/utils/ps.py | 30 + testsuite/src/lib/utils/test/test_list.py | 60 ++ testsuite/src/seed_data | 653 +++++++++++++ 14 files changed, 2170 insertions(+), 490 deletions(-) create mode 100644 testsuite/src/lib/cli.py create mode 100644 testsuite/src/lib/db/test_db.py create mode 100644 testsuite/src/lib/test_runners/regressions_runner.py create mode 100644 testsuite/src/lib/test_runners/runner_ui.py create mode 100644 testsuite/src/lib/test_runners/unit_runner.py create mode 100644 testsuite/src/lib/utils/cmds.py create mode 100644 testsuite/src/lib/utils/conf.py create mode 100644 testsuite/src/lib/utils/fs.py create mode 100644 testsuite/src/lib/utils/log.py create mode 100644 testsuite/src/lib/utils/ps.py create mode 100644 testsuite/src/lib/utils/test/test_list.py create mode 100644 testsuite/src/seed_data diff --git a/testsuite/README b/testsuite/README index d00f3c2614b..a3bcd53fdc9 100644 --- a/testsuite/README +++ b/testsuite/README @@ -27,8 +27,11 @@ To run the testsuites using the unified test-runner: Review and edit testsuite.conf as necessary. * pytest (pytest-3) must be installed in order to run the python testsuite. * expect must be installed in order to run the expect testsuite. -* If perldoc is installed, ./run-tests --man can be used to display a man page - for the test-runner. +* Set the SLURM_CLUSTER environment variable to set up a custom directory + in the 'results' dir that will hold unique logs and test data. + You can name this whatever you'd like, it will be created if needed. + This is a useful way to archive results from run-tests with things like + different dates, commits, or configs. * Execute run-tests with the desired options to run the testsuites. testsuite.conf: diff --git a/testsuite/run-tests b/testsuite/run-tests index dd1201f8412..96c0ee63eb7 100755 --- a/testsuite/run-tests +++ b/testsuite/run-tests @@ -1,491 +1,377 @@ -#!/usr/bin/perl -w +#!/usr/bin/env python3 ############################################################################ # Copyright (C) SchedMD LLC. ############################################################################ -use strict; - -use Getopt::Long qw(:config no_ignore_case); -use autouse 'Pod::Usage' => qw(pod2usage); -use Cwd qw(cwd abs_path); -use FindBin qw($Bin); -use IPC::Cmd qw(can_run); -use Term::ANSIColor; - -my ($help, $exit_on_first_failure, $exclude_pattern, $include_pattern, $man, $no_cleanup_on_failure); -my $output_dir = "$Bin/log"; -my $quiet = 0; -my $verbose = 0; - -GetOptions( - 'help' => \$help, - 'man' => \$man, - 'exit-on-first-failure|x' => \$exit_on_first_failure, - 'exclude|e=s' => \$exclude_pattern, - 'include|i=s' => \$include_pattern, - 'output-dir|o=s' => \$output_dir, - 'no-cleanup-on-failure|n' => \$no_cleanup_on_failure, - 'verbose|v+' => \$verbose, - 'quiet|q+' => \$quiet, -) or pod2usage(2); - -pod2usage(2) if $help; - -# Display usage if necessary -if ($man) { - if ($< == 0) { # Cannot invoke perldoc as root - my $id = eval { getpwnam("nobody") }; - $id = eval { getpwnam("nouser") } unless defined $id; - $id = -2 unless defined $id; - $< = $id; - } - $> = $<; # Disengage setuid - $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH - delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; - if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; } # Untaint $0 - else { die "Illegal characters were found in \$0 ($0)\n"; } - pod2usage(-exitstatus => 0, -verbose => 2); -} - -# Subtract quiet count from verbose count -$verbose -= $quiet; - -my ($cmd, $output, $rc); -my %summary = ('Passed' => 0, 'Failed' => 0, 'Errored' => 0, 'Skipped' => 0); -my $exit_tests = 0; -my @failed_tests = (); - -# Avoid using colors unless writing stdout to a terminal -$ENV{ANSI_COLORS_DISABLED}++ unless -t STDOUT; - -# Derive default configuration parameters -my %config = (); -$config{slurmsourcedir} = abs_path("$Bin/.."); -$config{slurmbuilddir} = $config{slurmsourcedir}; -$config{slurminstalldir} = '/usr/local'; -$config{slurmconfigdir} = "$config{slurminstalldir}/etc"; - -# Override configuration parameters with parameters from testsuite.conf -my $testsuite_config_file = defined $ENV{SLURM_TESTSUITE_CONF} ? $ENV{SLURM_TESTSUITE_CONF} : "$Bin/testsuite.conf"; -open CONFIG_FILE, $testsuite_config_file or die "Unable to open testsuite.conf ($testsuite_config_file) for reading: $!. This file can be created from a copy of the autogenerated sample found in BUILDDIR/testsuite/testsuite.conf.sample. By default, this file is expected to be found in SRCDIR/testsuite ($Bin). If placed elsewhere, set the SLURM_TESTSUITE_CONF environment variable to the absolute path of your testsuite.conf file.\n"; -foreach my $line () { - if ($line =~ /^\s*(\S+)\s*=\s*(\S+)\s*$/) { - my ($parameter_name, $parameter_value) = (lc $1, $2); - $parameter_value =~ s/\$\{prefix}/$config{slurminstalldir}/; - $config{$parameter_name} = $parameter_value; - } -} - -# Resolve and create test output directory if needed -if (defined $output_dir) { - $output_dir = abs_path($output_dir); - if (! -d $output_dir) { - mkdir $output_dir or die "Unable to create output directory $output_dir: $!\n"; - } -} - -# Warn if pytest is not installed -my $pytest_available = can_run('pytest-3'); -warn "Warning: The python tests require pytest to be installed in order to run.\n" unless $pytest_available; - -# Warn if expect is not installed -my $expect_available = can_run('expect'); -warn "Warning: The expect tests require expect (TCL) to be installed in order to run.\n" unless $expect_available; - -sub print_test_name { - my ($test_name) = @_; - - if ($verbose >= -1) { - printf "%s ", $test_name; - } -} - - -sub print_test_status { - my ($test_name, $test_status, $diagnostic_message, $test_output) = @_; - - # Update summary status totals - $summary{$test_status}++; - - # Print the test status - if ($test_status eq 'Passed') { - if ($verbose >= -1) { - print color('green'), uc $test_status, color('reset'), "\n"; - } - } - elsif ($test_status eq 'Skipped') { - if ($verbose == -1) { - # Overwrite the line - print "\b \b" x ((length $test_name) + 1); - } - elsif ($verbose >= 0) { - print color('yellow'), uc $test_status, color('reset'), "\n"; - } - } - else { - if ($verbose >= -1) { - print color('red'), uc $test_status, color('reset'), "\n"; - push @failed_tests, $test_name; - } - } - - # Print diagnostic messages if verbosity is sufficiently high - if (defined $diagnostic_message && - ((($test_status eq 'Failed' || $test_status eq 'Error') && - ($verbose == 0 || $verbose == 1)) || - ($test_status eq 'Skipped' && - ($verbose == 1 || $verbose == 2)))) { - chomp($diagnostic_message); - if ($test_status eq 'Skipped') { - print color('yellow'); - } - else { - print color('red'); - } - foreach my $line (split /\n/, $diagnostic_message) { - $line =~ s/^\s+//; - printf " $line\n"; - } - print color('reset'); - } - - # Print test output if verbosity is sufficiently high - if (defined $test_output && - ((($test_status eq 'Failed' || $test_status eq 'Error') && - $verbose >= 2) || - $verbose >= 3)) { - foreach my $line (split /\n/, $test_output) { - print "$line\n"; - } - } - - # Log output to file - log_output_file($test_name, $test_status, $test_output); - - # Trigger exit on first failure if relevant - if ($exit_on_first_failure && ($test_status eq 'Failed' || $test_status eq 'Error')) { - $exit_tests = 1; - } -} - - -sub log_output_file { - my ($test_name, $test_status, $test_output) = @_; - - return if ($output_dir eq '' || $output_dir eq '/dev/null'); - - my $output_file = "$test_name"; - $output_file =~ s|/|%|g; - $output_file .= '.log'; - if ($test_status eq 'Failed') { $output_file .= '.failed'; } - elsif ($test_status eq 'Error') { $output_file .= '.error'; } - elsif ($test_status eq 'Skipped') { $output_file .= '.skipped'; } - $output_file = "$output_dir/$output_file"; - - open OUTPUT_FILE, '>', "$output_file" or die "Unable to open $output_file for writing: $!\n"; - print OUTPUT_FILE $test_output; - close OUTPUT_FILE; -} - - -############################################################################## -# Run unit tests -############################################################################## - -sub run_unit_tests { - my ($test_base) = @_; - my $build_testsuite_dir = "$config{slurmbuilddir}/testsuite"; - - # Change to the specified test directory and get a list of test subdirectories - chdir("$build_testsuite_dir/$test_base") or die "Unable to change directory to $build_testsuite_dir/$test_base: $!\n"; - my $subdirs = `echo 'print: ; \@echo "\$(SUBDIRS)"' | make -f Makefile -f - print`; - chomp($subdirs); - - # Recursively iterate through all test subdirectories - foreach my $subdir (split / /, $subdirs) { - run_unit_tests("$test_base/$subdir"); - } - - # Return to the specified test directory and get a list of tests - chdir("$build_testsuite_dir/$test_base") or die "Unable to change directory to $build_testsuite_dir/$test_base: $!\n"; - my $tests = `echo 'print: ; \@echo "\$(TESTS)"' | make -f Makefile -f - print`; - chomp($tests); - - # Run make clean so that we get a fresh compile and test execution - `make clean`; - - # Iterate through all tests in the current test directory - foreach my $test (split /\s+/, $tests) { - my $test_name = "$test_base/$test"; - last if $exit_tests; - next if (defined $include_pattern && $test_name !~ /$include_pattern/); - next if (defined $exclude_pattern && $test_name =~ /$exclude_pattern/); - print_test_name($test_name); - - # Compile the unit test - my $compile_output = `make $test 2>&1`; - $rc = $? >> 8; - if ($rc) { - print_test_status($test_name, 'Error', 'Unable to compile test'); - next; - } - - # Execute the unit test using the test driver - # Set SUBDIRS to nothing so it doesnt descend into dirs without tests - my $test_output = `make check TESTS='${test}' SUBDIRS= 2>&1`; - $rc = $? >> 8; - - my $test_status = 'Passed'; - $test_status = 'Failed' if $rc; - - $test_output = $test_output . "\nTEST OUTPUT\n" . `cat ${test}.log`; - print_test_status($test_name, $test_status, '', $test_output); - } -} - -run_unit_tests('slurm_unit'); - - -############################################################################## -# Run expect tests -############################################################################## - -if ($expect_available) { - # Change directory to testsuite/expect - chdir("$Bin/expect") or die "Unable to change directory to $Bin/expect: $!\n"; - - # Obtain a list of expect tests to run - $output = `bash -c "ls test{?,??}\.{?,??,???} 2>/dev/null | sort -V" 2>&1`; - $rc = $? >> 8; - if ($rc) { - die "Unable to obtain list of expect tests to run: $output\n"; - } - my @expect_tests = split /\s+/, $output; - #use Data::Dumper; - #print Data::Dumper->Dump([\@expect_tests], ['expect_tests']), "\n"; - - # Iterate over each expect test - foreach my $test (@expect_tests) { - my $test_name = "expect/$test"; - - last if $exit_tests; - next if (defined $include_pattern && $test_name !~ /$include_pattern/); - next if (defined $exclude_pattern && $test_name =~ /$exclude_pattern/); - - # Initially print the test name so we can see what test is being run - print_test_name($test_name); - - # Run the test - my $test_output = `./$test 2>&1`; - $rc = $? >> 8; - - my $test_status = ''; - if ($rc == 0) { $test_status = 'Passed'; } - elsif ($rc > 127) { $test_status = 'Skipped'; } - else { $test_status = 'Failed'; } - - # Parse the test output for diagnostic information - my $diagnostic_output = ''; - foreach my $line (split /\n/, $test_output) { - if ($line =~ m%^\[[^\]]+\] (Fatal|Error)\s+(.*)$% || $line =~ m%^\[[^\]]+\] (Warning)\s+(.*skip\@globals.*)$%) { - my ($severity, $message) = ($1, $2); - $message =~ s/\([^\)]*\)//g; # Remove parenthesized inclusions - $message =~ s/\([^\)]*\)//g; # Remove singly nested layers - $message =~ s/\s{2,}/ /g; # Collapse whitespace - $message =~ s/\s+$//g; # Remove trailing whitespace - $diagnostic_output .= "$severity: $message\n"; - } - } - - print_test_status($test_name, $test_status, $diagnostic_output, $test_output); - } -} - - -############################################################################## -# Run python tests -############################################################################## - -if ($pytest_available) { - # Change directory to testsuite/python - chdir("$Bin/python") or die "Unable to change directory to $Bin/python: $!\n"; - - # Obtain a list of python tests to run via pytest - my @python_tests = (); - $output = `pytest-3 --collect-only -q tests 2>&1`; - $rc = $? >> 8; - if ($rc) { - die "Unable to obtain list of python tests to run: $output\n"; - } - - # Parse the collected test output - foreach my $line (split /\n/, $output) { - if ($line =~ m%^tests/.*test_\S+\.py::.*test_\S+$%) { - push @python_tests, $line; - } - } - #use Data::Dumper; - #print Data::Dumper->Dump([\@python_tests], ['python_tests']), "\n"; - - # Iterate over each collected test - foreach my $test (@python_tests) { - my $test_name = "python/$test"; - last if $exit_tests; - next if (defined $include_pattern && $test_name !~ /$include_pattern/); - next if (defined $exclude_pattern && $test_name =~ /$exclude_pattern/); - - # Initially print the test function name so we can see what test is being run - print_test_name($test_name); - - # Run the test function - my $test_output = `pytest-3 -s -rA -v --junit-xml=junit.xml $test 2>&1`; - $rc = $? >> 8; - - my $test_status; - #if ($rc == 0) { $test_status = 'Passed'; } - #elsif ($rc == 1) { $test_status = 'Failed'; } - #else { $test_status = 'Error'; } - - # Parse the junit output for test function and status information - open JUNIT_XML, "junit.xml" or die "Unable to open junit.xml for reading: $!\n"; - my $junit_output = do { local $/; }; - - $test_status = 'Passed'; - my $test_message = ''; - - # Iterate over each test function - # Since we are now running function-centric, there will just be one - foreach my $testcase ($junit_output =~ m%%sg) { - # All of the functions have already been run. Here we are just printing - # out the per-function statuses. - last if $exit_tests; - - my $subtest_class_name = ''; - my $subtest_file_name = ''; - my $subtest_fqn = ''; - my $subtest_function_name = ''; - my $subtest_status = 'Passed'; - my $subtest_message = ''; - - $subtest_file_name = $1 if $testcase =~ m%file="([^"]+)"%; - $subtest_class_name = $1 if $testcase =~ m%classname="([^"]+)"%; - $subtest_function_name = $1 if $testcase =~ m% name="([^"]+)"%; - if ($subtest_class_name ne '') { $subtest_class_name =~ s/.*\.//; } - if (index($subtest_file_name, $subtest_class_name) == -1) { - $subtest_fqn = "${subtest_file_name}::${subtest_class_name}::${subtest_function_name}"; - } else { - $subtest_fqn = "${subtest_file_name}::${subtest_function_name}"; - } - if ($testcase =~ / [B<-i, --include> I] [B<-e, --exclude> I] [B<-o, --output-dir> I] [B<-x, --exit-on-first-failure>] [B<-v, --verbose>]... [B<-q, --quiet>]... [B<-?, --help>] [B<--man>] - -=for comment -[B<-n, --no-cleanup-on-failure>] - -=head1 DESCRIPTION - -B is used to run tests across multiple testsuites (e.g. unit-tests, expect tests, python tests). It runs the tests and displays summary results (in a format similar to TAP). Test output files are left in the testsuite directory in order to review failure causes. - -=head1 OPTIONS - -=over 4 - -=item B<-i, --include> I - -only tests matching the specified pattern will be run - -=item B<-e, --exclude> I - -tests matching the specified pattern will not be run - -=item B<-o, --output-dir> I - -test output files will be written to the specified directory. The directory will be created if necessary. Output files are written to the testsuite/log directory by default. - -=item B<-x, --exit-on-first-failure> - -exit the testrun on the first test failure - -=for comment -item B<-n, --no-cleanup-on-failure> -does not teardown on failure - -=item B<-v, --verbose>... - -increase verbosity in test status and diagnostic information - -=item B<-q, --quiet>... - -reduce verbosity in test status and diagnostic information - -=item B<--help> - -brief help message - -=item B<--man> - -full documentation - -=back - -=head1 AUTHOR - -Scott Jackson, scottmo@schedmd.com - -=cut +import os, re, sys + +# SchedMD +sys.path.append(sys.path[0] + "/src/lib") +from db.test_db import ( + get_new_run_list, + reset_suite_run_ids, + setup_db_if_new, + update_records_from_dirs, +) +from cli import ( + get_args, + test_parser, +) +from utils.cmds import ( + perform, +) +from utils.conf import ( + get_vars_from_conf, +) +from utils.fs import ( + create_dir, + file_exists, + exit_if_not_exists, +) +from utils.log import ( + log, + log_header, + log_new_line, + print_pretty_dict, +) +from utils.ps import ( + is_tool, +) +from utils.test.test_list import ( + get_unit_tl_from_build_dir, + get_tl_from_dir, +) +from test_runners.unit_runner import ( + get_unit_run_stats, + run_unit_tests, +) +from test_runners.regressions_runner import ( + get_regressions_run_stats, + run_regressions_tests, +) + +# Set needed python version +REQUIRED_PYTHON_VERSION = "3.8" + + +def main(): + req_ver = [int(x) for x in REQUIRED_PYTHON_VERSION.split(".")] + if sys.version_info[0] < req_ver[0] or sys.version_info[1] < req_ver[1]: + sys.exit(f""" +*** PYTHON VERSION ERROR *** + +runtests requires python3 version {REQUIRED_PYTHON_VERSION} or greater, got {sys.version}. + +- Exiting -""") + else: + perform( + "Executing ./run-tests", + run_app, + ) + + +def run_app(): + # Set defaults + APP_DIR = os.getcwd() + SRC_DIR = f"{APP_DIR}/src" + SEED_FILE = f"{SRC_DIR}/seed_data" + CLUSTER_DIR_PREFIX = "results" + CLUSTER_DEFAULT_NAME = "default-run" + TS_CONF_DEFAULT = f"{APP_DIR}/testsuite.conf" + + # Handle envars + test_env = os.environ.copy() + test_env.setdefault("SLURM_TESTSUITE_CONF", TS_CONF_DEFAULT) + if test_env["SLURM_TESTSUITE_CONF"] == TS_CONF_DEFAULT: + if not file_exists(TS_CONF_DEFAULT): + msg = f""" +*** CONFIG ERROR *** + +SLURM_TESTSUITE_CONF environment variable not set and the default testsuite.conf +file doesn't exist. + +Please set SLURM_TESTSUITE_CONF to the appropriate testsuite.conf or create a +testsuite.conf file in this directory as outlined in the README. + +- Exiting - """ + sys.exit(msg) + + # Retrieve slurm directory data from ts_conf + ts_conf = test_env["SLURM_TESTSUITE_CONF"] + ts_conf_vars = get_vars_from_conf(ts_conf) + slurm_src_dir = ts_conf_vars["SlurmSourceDir"] + slurm_build_dir = ts_conf_vars["SlurmBuildDir"] + slurm_install_dir = ts_conf_vars["SlurmInstallDir"] + slurm_config_dir = ts_conf_vars["SlurmConfigDir"] + + # Set cluster to default if not exists + test_env.setdefault("SLURM_CLUSTER", CLUSTER_DEFAULT_NAME) + my_cluster = test_env["SLURM_CLUSTER"] + + # Set dynamic dirs + CLUSTER_DIR = f"{APP_DIR}/{CLUSTER_DIR_PREFIX}/{my_cluster}" + TS_DIR = f"{slurm_src_dir}/testsuite" + UNIT_DIR = f"{slurm_build_dir}/testsuite/slurm_unit" + GLOBALS_LOCAL_DEFAULT = f"{TS_DIR}/expect/globals.local" + args = get_args() + + # Create directories for chosen my_cluster in ts_conf + create_dir(f"{CLUSTER_DIR}/src") + output_dir = args.output_dir or f"{CLUSTER_DIR}/log" + create_dir(output_dir) + db_name = f"{CLUSTER_DIR}/src/test_database.db" + + # Handle globals.local for expect suite + test_env.setdefault("SLURM_LOCAL_GLOBALS_FILE", GLOBALS_LOCAL_DEFAULT) + + # Process chosen tests from args + # TODO see if adding fails_first option is wanted + # fails_first = args.fails_first or False + fails_first = False + all_unit_tests = get_unit_tl_from_build_dir( + f"{slurm_build_dir}/testsuite/slurm_unit" + ) + + incl_dict = test_parser(args.include, all_unit_tests) + excl_dict = test_parser(args.exclude, all_unit_tests) + + # Exit early if programs for chosen tests aren't installed + if len(incl_dict["expect"]) > 0: + if not is_tool("expect"): + sys.exit( + "The expect tests require expect (TCL) to be installed in order to run." + ) + + if len(incl_dict["python"]) > 0: + if not is_tool("pytest-3"): + sys.exit( + "The python tests require pytest-3 to be installed in order to run." + ) + + # Collect tests lists from the dirs (to be current with new or deleted tests) + all_expect_tests = get_tl_from_dir(f"{TS_DIR}/expect", "(test\d+\.\d+)$", "expect/") + all_python_tests = get_tl_from_dir( + f"{TS_DIR}/python/tests", "(test_\d+_\d+\.py)$", "python/tests/" + ) + + unit_dir_data = create_new_test_data(all_unit_tests, "slurm_unit") + expect_dir_data = create_new_test_data(all_expect_tests, "expect") + python_dir_data = create_new_test_data(all_python_tests, "python") + + # Setup or update the local db with all test data + start_fresh = args.reset + log_header("UPDATING RECORDS") + log(f"DB: {db_name}") + perform( + f"Setting up local test database", + setup_db_if_new, + db_name, + SEED_FILE, + start_fresh, + ) + perform( + "Updating records based on test dirs", + update_records_from_dirs, + db_name, + [unit_dir_data, expect_dir_data, python_dir_data], + ) + + # Log configuration + log_header("SETTINGS: ENV VARS") + log(f"SLURM_TESTSUITE_CONF={test_env['SLURM_TESTSUITE_CONF']}") + log(f"SLURM_LOCAL_GLOBALS_FILE={test_env['SLURM_LOCAL_GLOBALS_FILE']}") + log(f"SLURM_CLUSTER={test_env['SLURM_CLUSTER']}") + + log_header("SLURM_TESTSUITE_CONF VARS") + log(f"SlurmSourceDir = {slurm_src_dir}") + log(f"SlurmBuildDir = {slurm_build_dir}") + log(f"SlurmInstallDir = {slurm_install_dir}") + log(f"SlurmConfigDir = {slurm_config_dir}") + + # Log user include / exclude options + # log_new_line(f"Included tests: = ") + # print_pretty_dict(incl_dict) + + # log(f"Excluded tests: = ") + # print_pretty_dict(excl_dict) + + # Format user choices to valid lists + incl_dict = filter_tests( + incl_dict, all_unit_tests, all_expect_tests, all_python_tests + ) + excl_dict = filter_tests( + excl_dict, all_unit_tests, all_expect_tests, all_python_tests + ) + + # Begin Testing + log_header("**** BEGIN TESTING ****") + if not args.resume: + perform( + "Reseting run_ids", + reset_suite_run_ids, + db_name, + ["slurm_unit", "expect", "python"], + verbose=False + ) + + + # Run unit tests + aborted = False + run_unit_list = list(set(incl_dict["slurm_unit"]) - set(excl_dict["slurm_unit"])) + skip_unit = len(run_unit_list) < 1 + sorted_unit_data = ( + [] + if skip_unit + else (get_new_run_list(db_name, ["slurm_unit"], run_unit_list, fails_first)) + ) + + if not skip_unit: + perform( + "Running unit tests", + run_unit_tests, + db_name, + sorted_unit_data, + UNIT_DIR, + output_dir, + args.resume, + new_line=True, + ) + + unit_stats = get_unit_run_stats() + unit_status = unit_stats["status"] + if unit_status == "ABORTED": + aborted = True + + # Run regressions tests + run_regressions_list = list( + set(incl_dict["expect"]) - set(excl_dict["expect"]) + ) + list(set(incl_dict["python"]) - set(excl_dict["python"])) + + # If in resume mode start from resume idx using the last ran data + reg_start_idx = 0 + skip_reg = len(run_regressions_list) < 1 + sorted_reg_data = ( + [] + if skip_reg + else get_new_run_list( + db_name, ["expect", "python"], run_regressions_list, fails_first + ) + ) + + if not aborted: + if not skip_reg: + perform( + "Running regressions tests", + run_regressions_tests, + db_name, + sorted_reg_data, + TS_DIR, + output_dir, + test_env, + args.resume, + new_line=True, + ) + + regressions_stats = get_regressions_run_stats() + regressions_status = regressions_stats["status"] + if regressions_status == "ABORTED": + aborted = True + + + # Print summary data + print_run_summary( + [ + ("Unit Test Summary", get_unit_run_stats()), + ("Regressions Summary", get_regressions_run_stats()), + ] + ) + + print(f"Logs written to:\n\n{output_dir}\n") + if aborted: + msg = """Run ended early, to resume this run next time use the '-r' option +* Note: running without '-r' next time will reset all 'resume' history + """ + print(msg) + os.chdir(APP_DIR) + + +def print_run_summary(run_tup_list): + log_header("RESULTS") + frmt = "{:<20s}{:>10s}" + + for run_tup in run_tup_list: + msg, stats_dict = run_tup + if len(stats_dict.items()) > 0: + completions = stats_dict["completions"] + passes = stats_dict["passes"] + skips = stats_dict["skips"] + fails = stats_dict["fails"] + num_fails = len(fails) + num_tests = stats_dict["total"] + status = stats_dict["status"] + + if completions + skips + num_fails > 0: + print_frmt(frmt, f"{msg}:", "") + print_frmt(frmt, "Completions:", f"{completions}/{num_tests}") + print_frmt(frmt, "Passed:", passes) + print_frmt(frmt, "Failed:", num_fails) + print_frmt(frmt, "Skipped:", skips) + print_frmt(frmt, "Failed tests:", "") + print(", ".join(fails)) + print(f"{('-' * 20)}\n") + + +def print_frmt(text_format, msg1, msg2): + print(text_format.format(f"{msg1}", f"{msg2}")) + + +def create_new_test_data(test_name_list, suite, duration=10000, status="", run_id=0): + result = [] + + for test_name in test_name_list: + result.append((test_name, suite, duration, status, run_id)) + + return result + + +def filter_tests(t_dict, all_unit_tests, all_expect_tests, all_python_tests): + for k, v in t_dict.items(): + if k == "slurm_unit": + if len(v) > 0: + if v[0] == "all": + t_dict[k] = all_unit_tests + else: + t_dict[k] = intersect(v, all_unit_tests) + + elif k == "expect": + if len(v) > 0: + if v[0] == "all": + t_dict[k] = all_expect_tests + else: + t_dict[k] = intersect(v, all_expect_tests) + + else: + if len(v) > 0: + if v[0] == "all": + t_dict[k] = all_python_tests + else: + t_dict[k] = intersect(v, all_python_tests) + + return t_dict + + +def intersect(user_list, all_list): + # Handle *. or .* and *_ or _* options + result = [] + for test_name in user_list: + test_name = test_name.replace(".", "\.").replace("*", "\d+") + name_re = re.compile(f"{test_name}$") + + for valid_name in all_list: + if name_re.search(valid_name): + result.append(valid_name) + + return list(set(result)) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/testsuite/src/lib/cli.py b/testsuite/src/lib/cli.py new file mode 100644 index 00000000000..d99608e74e1 --- /dev/null +++ b/testsuite/src/lib/cli.py @@ -0,0 +1,146 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import argparse, re +from collections import OrderedDict + + +def get_args(argv=None): + parser = argparse.ArgumentParser(prog="app", description="host", epilog="") + + # TODO add number of recursions, continue, prompt before start + + parser.add_argument( + "-i", + "--include", + dest="include", + default="unit, expect, python", + type=str, + help="include these tests", + ) + + parser.add_argument( + "-e", + "--exclude", + dest="exclude", + default="", + type=str, + help="exclude these tests", + ) + + parser.add_argument( + "-o", + "--output", + dest="output_dir", + type=str, + help="where you'd like the logs stored (supply absolute path)" + ) + + parser.add_argument( + "-r", + "--resume", + dest="resume", + default=False, + action="store_true", + help="resume from the last run", + ) + + parser.add_argument( + "-R", + "--reset", + dest="reset", + default=False, + action="store_true", + help="reset the local db", + ) + + return parser.parse_args() + + +def test_parser(value, unit_tests_list): + """Function for parsing suite / test lists from args + + Takes a string of suites and / or tests and returns a unique list + to be used lated. + + If a whole suite is chosen (ie 'expect') and individual tests within + that suite are chosen as well, the whole suite wins out rather than doing the + suite (with that test in it) and that test again in isolation. + + Some examples of exceptable options are: + + '1.5' + 'test9.8' + '2.6 test3.1 14.2' + '3.4,6.7,8.3' + '1.*' + '*.2' + '1.*,3.8,9.2' + 'expect,unit,python,1.1,111_1,1.*' -> 'expect,unit,python' + """ + + result = OrderedDict() + result["slurm_unit"] = [] + result["expect"] = [] + result["python"] = [] + + # Remove commas + value = value.replace(",", " ") + + # Split the suites (if any) and remove from value string + suite_re = re.compile(r"\bunit\b | \bexpect\b | \bpython\b", flags=re.I | re.X) + chosen_suites = [] + if suites := list(set(suite_re.findall(value))): + for suite in suites: + out_val = suite + + # Rename unit suite to slurm_unit to use later + # (so it matches the dir structure) + if suite == "unit": + out_val = "slurm_unit" + result["slurm_unit"] = [] + + result[out_val].append("all") + chosen_suites.append(suite) + value = value.replace(suite, "").strip() + + # Split the user's option string into a series of tuples that represent + # each test, and add each tuple to the destination array. + if len(value) > 0: + test_re = re.compile("(?=test)?(\d+|\*)(\.|_)(\d+|\*).*$") + splitter = re.compile("[,\s]+") + val = splitter.split(value) + + for v in val: + matches = test_re.findall(v) + + if len(matches) > 0: + m = matches[0] + + # expect tests: + if m[1] == ".": + if not "expect" in chosen_suites: + result["expect"].append(f"expect/test{''.join(m)}") + + # python tests: + if m[1] == "_": + if not "python" in chosen_suites: + result["python"].append(f"python/tests/test_{''.join(m)}.py") + + value = value.replace(v, "") + + # Handle individual unit tests that may exists + if len(value) > 0 and not "slurm_unit" in chosen_suites: + val = splitter.split(value) + + for unit_test_name in val: + if len(unit_test_name) > 0: + for unit_test_path in unit_tests_list: + if unit_test_name in unit_test_path: + result["slurm_unit"].append(unit_test_path) + + # Remove duplicates + for k, v in result.items(): + result[k] = list(set(v)) + + return result diff --git a/testsuite/src/lib/db/test_db.py b/testsuite/src/lib/db/test_db.py new file mode 100644 index 00000000000..5b572040388 --- /dev/null +++ b/testsuite/src/lib/db/test_db.py @@ -0,0 +1,264 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import os, sqlite3 as db + +# SchedMD +from utils.fs import ( + delete_file, + file_exists, +) +from utils.log import ( + log, +) + +TESTS_TABLE = "tests" +db_name = "" + + +# Debug tips: +# In the run-tests dir run .run-tests then +# sqlite3 src/test_database.db +# then SELECT * FROM tests; to see results + + +def get_connection(db_name): + conn = db.connect(db_name) + return conn + + +def execute_query(db_name, sql, values=None): + conn = get_connection(db_name) + if values: + cur = conn.execute(sql, values) + else: + cur = conn.execute(sql) + + data = cur.fetchall() + conn.commit() + conn.close() + return data or None + + +def execute_many(db_name, sql, values=None): + conn = get_connection(db_name) + conn.executemany(sql, values) + conn.commit() + conn.close() + + +def create_new_db(db_name, seed_data): + """Creates a new test database to info for sorting + + Name paths are relative to the main app script + (that lives in the relative testsuite dir) + + seed_data = [ + ('expect/test1.1', 'expect-23.11', 1.06, 'PASSED'), + ('python/tests/test_111_1.py', 'python-23.11', 2.5, 'FAILED'), + ('slurm_unit/common/log-test.c', 'slurm-unit', 0, 'SKIPPED'), + etc + ] + """ + + delete_file(db_name) + + sql = f""" + CREATE TABLE {TESTS_TABLE} ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + name TEXT, + test_suite TEXT, + duration REAL, + status TEXT, + run_id INTEGER + ); + """ + + execute_query(db_name, sql) + insert_or_update_many(db_name, seed_data) + + +def create_seed_data_from_file(seed_file): + seed_data = [] + + with open(seed_file) as f: + for line in f: + if line[0] != "#": + # Expected tuple format: + # (name, test_suite, duration, status) + li = list(line.split(",")) + li[3] = "" # Set status to "" + # Append a base run_id=0 as last field (its not exported in seed data) + li.append(0) + seed_data.append(tuple(li)) + + return seed_data + + +def list_to_quote_str(lst): + return ",".join(["'{}'".format(val) for val in lst]) + + +def get_sorted_test_list( + db_name, + suite_list, + test_status_list=["", "FAILED", "PASSED", "SKIPPED"], + name_list=[], +): + """Retrieve an ordered test list based on duration and status (optional) + + Useful to grab all the FAILED test sorted first as a list and then + append the others (with failed ommitted) in order to run the fails first + """ + suite_vals = list_to_quote_str(suite_list) + name_vals = list_to_quote_str(name_list) + status_vals = list_to_quote_str(test_status_list) + + # If you want specific tests (using the -i option from cli.py) + name_vals = list_to_quote_str(name_list) + cond_test_query = f""" + AND name IN ({name_vals}) + """ + do_tests = cond_test_query if len(name_list) > 0 else "" + + sql = f""" + SELECT * from {TESTS_TABLE} + WHERE test_suite IN ({suite_vals}) + {do_tests} + AND status IN ({status_vals}) + ORDER BY duration + """ + + data = execute_query(db_name, sql) + return data + + +def reset_suite_run_ids(db_name, suite_list): + suite_vals = list_to_quote_str(suite_list) + sql = f""" + UPDATE {TESTS_TABLE} + SET run_id=0 + WHERE run_id=1 AND test_suite IN ({suite_vals}) + """ + execute_query(db_name, sql) + + +def get_sorted_FAILED_list(db_name, suite_list, name_list=[]): + return get_sorted_test_list(db_name, suite_list, ["FAILED"], name_list=name_list) + + +def get_sorted_not_FAILED_list(db_name, suite_list, name_list=[]): + return get_sorted_test_list( + db_name, suite_list, ["", "PASSED", "SKIPPED"], name_list=name_list + ) + + +def get_new_run_list(db_name, suite_list, name_list=[], fails_first=False): + if fails_first: + fails = ( + get_sorted_test_list(db_name, suite_list, ["FAILED"], name_list=name_list) + or [] + ) + + other = ( + get_sorted_test_list( + db_name, suite_list, ["", "PASSED", "SKIPPED"], name_list=name_list + ) + or [] + ) + result = fails + other + + else: + result = get_sorted_test_list(db_name, suite_list, name_list=name_list) or [] + + return result + + +def get_id(db_name, name, test_suite): + sql = f""" + SELECT id FROM {TESTS_TABLE} + WHERE name='{name}' AND test_suite='{test_suite}' + """ + + result = execute_query(db_name, sql) + data = result[0][0] if result else None + return data + + +def insert_or_update_row(db_name, test_row_tup, verbose=False): + name, test_suite, duration, status, run_id = test_row_tup + up_sql = "" + in_sql = "" + + if test_id := get_id(db_name, name, test_suite): + up_sql = f""" + UPDATE {TESTS_TABLE} + SET name=?, test_suite=?, duration=?, status=?, run_id=? + WHERE id=? + """ + return (up_sql, (name, test_suite, duration, status, run_id, test_id), "", None) + else: + in_sql = f""" + INSERT INTO {TESTS_TABLE} (name, test_suite, duration, status, run_id) + VALUES(?,?,?,?,?) + """ + return ("", None, in_sql, test_row_tup) + + if verbose: + print(up_sql) + + +def insert_or_update_many(db_name, test_data): + insert_sql = "" + update_sql = "" + insert_val_list = [] + update_val_list = [] + + for test_row_tup in test_data: + up_sql, up_vals, in_sql, in_vals = insert_or_update_row(db_name, test_row_tup) + update_sql = up_sql + insert_sql = in_sql + if in_vals: + insert_val_list.append(in_vals) + if up_vals: + update_val_list.append(up_vals) + + if len(insert_sql) > 0: + execute_many(db_name, insert_sql, insert_val_list) + + if len(update_sql) > 0: + execute_many(db_name, update_sql, update_val_list) + + +def insert_if_new_many(db_name, test_data): + insert_val_list = [] + sql = f""" + INSERT INTO {TESTS_TABLE} (name, test_suite, duration, status, run_id) + VALUES(?,?,?,?,?) + """ + + for test_row_tup in test_data: + name, test_suite, duration, status, run_id = test_row_tup + test_id = get_id(db_name, name, test_suite) + + # Add values to insert if it doesn't exist + if not test_id: + insert_val_list.append(test_row_tup) + + execute_many(db_name, sql, insert_val_list) + + +def setup_db_if_new(_db_name, SEED_FILE, create_fresh_db=False): + global db_name + db_name = _db_name + + if not file_exists(db_name) or create_fresh_db: + log("--Creating new db--") + seed_data = create_seed_data_from_file(SEED_FILE) + create_new_db(db_name, seed_data) + + +def update_records_from_dirs(db_name, dir_test_data_list): + # Update the db for new dir data each time + for dir_test_data in dir_test_data_list: + insert_if_new_many(db_name, dir_test_data) diff --git a/testsuite/src/lib/test_runners/regressions_runner.py b/testsuite/src/lib/test_runners/regressions_runner.py new file mode 100644 index 00000000000..e28d70b31f7 --- /dev/null +++ b/testsuite/src/lib/test_runners/regressions_runner.py @@ -0,0 +1,182 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import os, re +from time import perf_counter + +# SchedMD +from db.test_db import ( + insert_or_update_many, +) +from test_runners.runner_ui import ( + color_state, + print_status_line, + print_test_line, +) +from utils.log import ( + log_new_line, +) +from utils.cmds import ( + perform, + run_cmd, + run_cmd_or_exit, +) +from utils.fs import ( + write_str_to_new_file, +) + +stats_dict = {} + + +def run_regressions_tests( + db_name, + test_data_list, + APP_DIR, + LOG_DIR, + test_env, + resume=False +): + global stats_dict + + total_tests = len(test_data_list) + result_data_list = [] + python_status_re = re.compile("=+\\n(PASSED|FAILED|SKIPPED|ERROR)") + msg = "Updating regressions test records" + fails = [] + + stats_dict = { + "completions": 0, + "passes": 0, + "skips": 0, + "fails": [], + "total": total_tests, + "status": "", + } + + # Handle already ran tests if resume mode is on + if resume: + stats_dict, test_data_list, result_data_list = filter_resume_data( + stats_dict, + test_data_list, + result_data_list) + print(f"~ Resuming from {len(result_data_list)} previously ran tests ~") + + # Prepare environment + expect_suite_dir = "expect/" + expect_dir = f"{APP_DIR}/{expect_suite_dir}" + + python_suite_dir = "python/tests/" + python_dir = f"{APP_DIR}/{python_suite_dir}" + + cur_dir = os.getcwd() + + for test_data in test_data_list: + _id, name, test_suite, duration, status, run_id = list(test_data) + test_dir, rel_name = name.rsplit("/", 1) + + if test_suite == "expect": + cmd = f"expect {rel_name} 2>&1" + suite_dir = expect_suite_dir + my_dir = expect_dir + else: + cmd = f"pytest-3 -s -rA -v {rel_name} 2>&1" + cmd = f"bash -c '{cmd}'" + suite_dir = python_suite_dir + my_dir = python_dir + + # Run the test + if cur_dir != my_dir: + cur_dir = my_dir + os.chdir(my_dir) + + start = perf_counter() + print_status_line( + f"{suite_dir}{rel_name}", stats_dict["completions"], stats_dict["total"] + ) + + try: + output = run_cmd( + cmd, + env=test_env, + quiet=True, + print_output=False, + ) + except KeyboardInterrupt: + stats_dict["status"] = "ABORTED" + break + + # Gather stats + end = perf_counter() + duration = round(end - start, 2) + status = get_test_status(test_suite, output, python_status_re) + run_id = 1 + + # Print test result + print_test_line(f"{suite_dir}{rel_name}", duration, status) + my_tup = (name, test_suite, duration, status, run_id) + + # Update stats + stats_dict["completions"] += 1 + + if status == "FAILED": + fails.append(my_tup) + stats_dict["fails"].append(rel_name) + filepath = f"{LOG_DIR}/{rel_name}.log.failed" + write_str_to_new_file(f"{output.stdout}\n{output.stderr}", filepath) + elif status == "SKIPPED": + stats_dict["skips"] += 1 + filepath = f"{LOG_DIR}/{rel_name}.log.skipped" + write_str_to_new_file(output.stdout, filepath) + else: + stats_dict["passes"] += 1 + + result_data_list.append((name, test_suite, duration, status, run_id)) + + # Prepare result if finished a complete run + # (Mostly for updating the db with new durations) + num_results = len(result_data_list) + if num_results == total_tests: + stats_dict["status"] = "COMPLETED" + + # Update the db only on a fail (with fail_fast) or complete run + if num_results: + perform(msg, insert_or_update_many, db_name, result_data_list, verbose=False) + + +def filter_resume_data(stats_dict, test_data_list, result_data_list): + new_test_data_list = [] + for test_data in test_data_list: + _id, name, test_suite, duration, status, run_id = list(test_data) + test_dir, rel_name = name.rsplit("/", 1) + + # run_id = 0 -> fresh, run_id = 1 -> ran last time + if run_id > 0: + stats_dict["completions"] += 1 + + if status == "FAILED": + stats_dict["fails"].append(rel_name) + elif status == "SKIPPED": + stats_dict["skips"] += 1 + else: + stats_dict["passes"] += 1 + + result_data_list.append((name, test_suite, duration, status, run_id)) + else: + new_test_data_list.append(test_data) + return (stats_dict, new_test_data_list, result_data_list) + + +def get_regressions_run_stats(): + return stats_dict + + +def get_test_status(test_suite, output, status_re=""): + status = "PASSED" + if test_suite == "expect": + if output.returncode != 0: + status = "SKIPPED" if output.returncode > 127 else "FAILED" + else: + result = status_re.findall(output.stdout)[0] + status = "FAILED" if result == "ERROR" else result + + return status diff --git a/testsuite/src/lib/test_runners/runner_ui.py b/testsuite/src/lib/test_runners/runner_ui.py new file mode 100644 index 00000000000..f8788fafbd3 --- /dev/null +++ b/testsuite/src/lib/test_runners/runner_ui.py @@ -0,0 +1,79 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import sys +from math import floor + +COL1 = 48 +COL2 = 14 +COL3 = 20 + +TEST_TXT_FMT = f"{{:<{COL1}s}}{{:>{COL2}s}}{{:<{COL3}s}}" + + +class Colors: + # 8 + BLK = "\u001b[30m" + RED = "\u001b[31m" + GREEN = "\u001b[32m" + YELLOW = "\u001b[33m" + BLUE = "\u001b[34m" + MAGENTA = "\u001b[35m" + CYAN = "\u001b[36m" + WHITE = "\u001b[37m" + RESET = "\u001b[0m" + + +CLR_PASS = Colors.CYAN +CLR_FAIL = Colors.RED +CLR_SKIP = Colors.BLUE +CLR_RESET = Colors.RESET + + +def color_state(state, msg): + clr = "" + + if state == "FAILED" or state == "FAIL": + clr = CLR_FAIL + + if state == "PASSED" or state == "PASS": + clr = CLR_PASS + + if state == "SKIPPED" or state == "SKIP": + clr = CLR_SKIP + + if state == "WARNING" or state == "WARN": + clr = CLR_WARN + + return f"{clr}{msg}{CLR_RESET}" + + +def print_test_line(name, duration, status): + clr = CLR_FAIL + + if status == "PASSED": + clr = CLR_PASS + elif status == "SKIPPED": + clr = CLR_SKIP + + print( + TEST_TXT_FMT.format( + f"{clr}{name}", + f"{duration} sec ", + f"{status}{CLR_RESET}", + ) + ) + + +def print_status_line( + msg, tests_complete, tests_total, prefix="> Running ", suffix="..." +): + col1 = 52 + col2 = 5 + col3 = 10 + + STATUS_TXT_FMT = f"{{:<{col1}s}}{{:>{col2}s}}{{:<{col3}s}}" + perc = f"{floor((tests_complete / tests_total) * 100)}% " + info = f"[{tests_complete}/{tests_total}]" + + print(STATUS_TXT_FMT.format(f"{prefix}{msg}{suffix}", perc, info), end="\r") diff --git a/testsuite/src/lib/test_runners/unit_runner.py b/testsuite/src/lib/test_runners/unit_runner.py new file mode 100644 index 00000000000..4dd758dc589 --- /dev/null +++ b/testsuite/src/lib/test_runners/unit_runner.py @@ -0,0 +1,159 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import os, re +from time import perf_counter + +# SchedMD +from db.test_db import ( + insert_or_update_many, +) +from test_runners.runner_ui import ( + color_state, + print_status_line, + print_test_line, +) +from utils.log import ( + log_new_line, +) +from utils.cmds import ( + perform, + run_cmd, + run_cmd_or_exit, +) +from utils.fs import ( + write_str_to_new_file, +) + +stats_dict = {} + + +def run_unit_tests(db_name, test_data_list, UNIT_DIR, LOG_DIR, resume=False): + global stats_dict + + total_tests = len(test_data_list) + result_data_list = [] + fail_name_re = re.compile("(slurm_unit.*)") + msg = "Updating unit test records" + fails = [] + + stats_dict = { + "completions": 0, + "passes": 0, + "skips": 0, + "fails": [], + "total": len(test_data_list), + "status": "", + } + + # Handle already ran tests if resume mode is on + if resume: + stats_dict, test_data_list, result_data_list = filter_resume_data( + stats_dict, + test_data_list, + result_data_list) + print(f"~ Resuming from {len(result_data_list)} previously ran tests ~") + + # Change to slurm_build_dir/testsuite/slurm_unit & run make + os.chdir(UNIT_DIR) + run_cmd("make -j clean", quiet=True) + make_output = run_cmd_or_exit( + "make -j", "ERROR: unable to perform make", quiet=True + ) + + for test_data in test_data_list: + _id, name, test_suite, duration, status, run_id = list(test_data) + test_dir, rel_name = name.rsplit("/", 1) + rel_name = rel_name.rsplit(".", 1)[0] # Remove '.c' extension + + # Run the test + start = perf_counter() + os.chdir(test_dir) + print_status_line(rel_name, stats_dict["completions"], stats_dict["total"]) + + try: + # Set 'SUBDIRS= ' so check doesnt descend into dirs without tests + chk_output = run_cmd(f"make check TESTS='{rel_name}' SUBDIRS= ", quiet=True) + except KeyboardInterrupt: + stats_dict["status"] = "ABORTED" + break + + # Gather stats + end = perf_counter() + duration = round(end - start, 2) + status = "FAILED" if chk_output.returncode else "PASSED" + run_id = 1 + + # Print test result + print_test_line(rel_name, duration, status) + + my_tup = (name, test_suite, duration, status, run_id) + + # Update stats + stats_dict["completions"] += 1 + + if status == "FAILED": + total_out = ( + f"{make_output.stdout}" + f"{make_output.stderr}" + f"{chk_output.stdout}" + f"{chk_output.stderr}" + ) + fails.append(my_tup) + fail_name = fail_name_re.findall(name)[0].rsplit(".", 1)[0] + stats_dict["fails"].append(fail_name) + + # Save log file + filepath = f"{LOG_DIR}/{rel_name}.log.failed" + write_str_to_new_file(f"{chk_output.stdout}\n{chk_output.stderr}", filepath) + + elif status == "SKIPPED": + # Save log file + filepath = f"{LOG_DIR}/{rel_name}.log.skipped" + write_str_to_new_file(total_out, filepath) + stats_dict["skips"] += 1 + else: + stats_dict["passes"] += 1 + + result_data_list.append((name, test_suite, duration, status, run_id)) + + # Clean make leftovers + run_cmd("make clean", quiet=True) + + # Prepare result if finished a complete run + # (Mostly for updating the db with new durations) + num_results = len(result_data_list) + if num_results == total_tests: + result = result_data_list + stats_dict["status"] = "COMPLETED" + + # Update the db only on a fail (with fail_fast) or complete run + if num_results: + perform(msg, insert_or_update_many, db_name, result_data_list, verbose=False) + + +def filter_resume_data(stats_dict, test_data_list, result_data_list): + new_test_data_list = [] + for test_data in test_data_list: + _id, name, test_suite, duration, status, run_id = list(test_data) + test_dir, rel_name = name.rsplit("/", 1) + + # run_id = 0 -> fresh, run_id = 1 -> ran last time + if run_id > 0: + stats_dict["completions"] += 1 + + if status == "FAILED": + stats_dict["fails"].append(rel_name) + elif status == "SKIPPED": + stats_dict["skips"] += 1 + else: + stats_dict["passes"] += 1 + + result_data_list.append((name, test_suite, duration, status, run_id)) + else: + new_test_data_list.append(test_data) + return (stats_dict, new_test_data_list, result_data_list) + + +def get_unit_run_stats(): + return stats_dict diff --git a/testsuite/src/lib/utils/cmds.py b/testsuite/src/lib/utils/cmds.py new file mode 100644 index 00000000000..2ee2855df05 --- /dev/null +++ b/testsuite/src/lib/utils/cmds.py @@ -0,0 +1,91 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import os, shlex, subprocess, sys, time + +# SchedMD +from utils.log import ( + log, + log_new_line, +) + +CMD_HOOK = "CMD: " + + +# Run functions with stats +def perform(action_desc, function, *args, verbose=True, new_line=False, decor=".."): + log_func = log + nl = "" + + if new_line: + log_func = log_new_line + nl = "\n" + + # action_desc: ie Cloning Repo, building slurm, + if verbose: + log_func(f"[{decor}{action_desc}{decor}]{nl}") + + start = time.perf_counter() + result = function(*args) + finish = time.perf_counter() + time_stat = round(finish - start, 2) + + if verbose: + log_func(f"[{decor}{action_desc}{decor}] finished in {time_stat} seconds") + + return result + + +def run_cmd(cmd, env=None, quiet=False, print_output=False, timeout=None, shell=False): + if not quiet: + log(CMD_HOOK + cmd) + + # If shell is specified, then let the shell split and parse the cmd string + if not shell: + cmd = shlex.split(cmd) + + if print_output: + std_out = sys.stdout + std_err = sys.stderr + else: + std_out = subprocess.PIPE + std_err = subprocess.PIPE + + output = subprocess.run( + cmd, + env=env, + stdout=std_out, + stderr=std_err, + timeout=timeout, + shell=shell, + text=True, + ) + + if ( + not quiet + and not print_output + and output.returncode != 0 + and output.stderr != "" + ): + log("Error: %s" % output.stderr) + + # Access rc from output elsewhere with output.returncode + return output + + +def run_cmd_or_exit( + cmd, msg, rc=0, quiet=False, print_output=False, timeout=None, shell=False +): + output = run_cmd( + cmd, quiet=quiet, print_output=print_output, timeout=timeout, shell=shell + ) + + if output.returncode != rc: + log(f"{msg}") + log(f"'{cmd}' failed with returncode {output.returncode}") + log(f"stderr: {output.stderr}") + log(f"stdout: {output.stdout}") + + sys.exit("Exiting") + else: + return output diff --git a/testsuite/src/lib/utils/conf.py b/testsuite/src/lib/utils/conf.py new file mode 100644 index 00000000000..99a5cea2d54 --- /dev/null +++ b/testsuite/src/lib/utils/conf.py @@ -0,0 +1,18 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import re +from collections import OrderedDict + + +def get_vars_from_conf(filename, comment="#"): + data = OrderedDict() + + with open(filename) as f: + for line in f: + if not line.startswith(comment): + if re.search(r"\s*\S+\s*=\s*\S+\s*$", line): + name, val = line.strip().replace(" ", "").split("=") + data[name] = val + + return data diff --git a/testsuite/src/lib/utils/fs.py b/testsuite/src/lib/utils/fs.py new file mode 100644 index 00000000000..717e52bed77 --- /dev/null +++ b/testsuite/src/lib/utils/fs.py @@ -0,0 +1,75 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import os, pathlib, re, shutil, sys + +# SchedMD +from utils.log import ( + log, + log_header, + log_new_line, +) + + +# Filesystem +def cat(filename): + with open(filename) as file: + return file.read().replace("\n", "") + + +def cp_files_by_re(src_dir, dest_dir, pattern): + for filename in os.listdir(src_dir): + filepath = os.join(source_dir, filename) + if os.path.isfile(filepath): + if re.search(rf"{pattern}", filepath): + shutil.copy(filepath, dest_dir) + + +def cp_expect_logs(src_dir, dest_dir): + log(f"Copying expect test logs from {src_dir} to {dest_dir}") + cp_files_by_re(src_dir, dest_dir, "testrun-results.json") + cp_files_by_re(src_dir, dest_dir, "test\D+\.log.*") + + +def create_dir(dir): + log(f"Creating {dir}") + pathlib.Path(dir).mkdir(parents=True, exist_ok=True) + + +def delete_file(filename): + try: + os.remove(filename) + except OSError: + pass + + +def write_str_to_new_file(string, filename): + delete_file(filename) + with open(filename, "w") as f: + f.write(string) + + +def file_exists(filename): + return os.path.isfile(filename) + + +def exit_if_not_exists(filename): + if not file_exists(filename): + log_header("CONFIGURATION ERROR") + log(f"ERROR: {filename} not found:") + sys.exit("\nExiting..") + + +def remove_dir(dir): + log("Removing: " + dir) + if os.path.isdir(dir): + uid = os.getuid() + gid = os.getgid() + cmd = f"sudo chown -R {uid}:{gid} {dir}" + run_cmd(cmd) + + shutil.rmtree(dir) + if not os.path.isdir(dir): + log(f"Removing: - Done! - {dir}") + else: + log(f"Removing: - Not Found - {dir}") diff --git a/testsuite/src/lib/utils/log.py b/testsuite/src/lib/utils/log.py new file mode 100644 index 00000000000..b2a5d59ac60 --- /dev/null +++ b/testsuite/src/lib/utils/log.py @@ -0,0 +1,34 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +PREFIX = "run-tests: " +BAR = "=" * 5 + + +# Logging +def log(text, prefix=PREFIX): + print(f"{prefix}{text}") + + +def log_new_line(text): + log(text, f"\n{PREFIX}") + + +def log_header(header): + log(f"\n{BAR} {PREFIX}< {header} > {BAR}\n", "") + + +def log_footer(footer): + log(f"\n{BAR} {PREFIX} {BAR}\n", "") + + +def log_if_exists(var): + if var is not None and len(var) > 0: + log(var) + + +def print_pretty_dict(d): + print("\n{") + for k, v in d.items(): + print(f" {k}: {v}") + print("}\n") diff --git a/testsuite/src/lib/utils/ps.py b/testsuite/src/lib/utils/ps.py new file mode 100644 index 00000000000..f85b6a52f27 --- /dev/null +++ b/testsuite/src/lib/utils/ps.py @@ -0,0 +1,30 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +# SchedMD +from utils.log import ( + log, +) +from utils.cmds import ( + run_cmd, +) + + +def get_pids_from_exe(exe_path, verbose=True) -> list: + if verbose: + log(f"Retrieving pids from {exe_path}") + + # NOTE do we need a run_cmd_output? + pid_list = run_cmd(f"pidof {exe_path}").strip().split() + return pid_list + + +# TODO add a repeat_until to this with timeout +def kill_pids_from_exe(exe_path): + for pid in get_pids_from_exe(exe_path): + run_cmd(f"kill {pid}") + + +def is_tool(tool): + from shutil import which + return which(tool) is not None diff --git a/testsuite/src/lib/utils/test/test_list.py b/testsuite/src/lib/utils/test/test_list.py new file mode 100644 index 00000000000..5f176ef5689 --- /dev/null +++ b/testsuite/src/lib/utils/test/test_list.py @@ -0,0 +1,60 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import os, re + +# SchedMD +from ..cmds import ( + perform, + run_cmd, +) +from ..log import ( + log, +) + + +def get_unit_tl_from_build_dir(build_unit_base_dir): + return perform( + "Running Make(s)", + setup_unit_tests, + build_unit_base_dir + ) + + +def setup_unit_tests(build_unit_base_dir): + # Returns a list of unit tests via the Makefiles in the build/build dir + test_list = [] + + for subdir, dirs, files in os.walk(build_unit_base_dir): + for file in files: + if file == "Makefile": + os.chdir(subdir) + run_cmd("make -j", shell=True, quiet=True, print_output=False) + + cmd = f""" + echo 'print: ; @echo "$(TESTS)"' | make -f Makefile -f - print + """ + result = run_cmd(cmd, shell=True, quiet=True) + output = result.stdout.strip().split(" ") + if result.returncode == 0: + for name in output: + if len(name) > 0: + test_list.append(f"{subdir}/{name}.c") + + # NOTE: Uncomment for more verbosity if adding a -v option later + #else: + # print(f"Warning bad make output: {result.stdout}{result.stderr}") + + return list(set(test_list)) + + +def get_tl_from_dir(base_dir, re_pattern, append=""): + file_re = re.compile(re_pattern) + test_list = [] + + for subdir, dirs, files in os.walk(base_dir): + for file in files: + if file in file_re.findall(file): + test_list.append(f"{append}{file}") + + return list(set(test_list)) diff --git a/testsuite/src/seed_data b/testsuite/src/seed_data new file mode 100644 index 00000000000..6d05b2ae9c3 --- /dev/null +++ b/testsuite/src/seed_data @@ -0,0 +1,653 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +# cmd: ./get_test_data.py -b 23.11 -e 35 -p 34 +# name, test_suite, duration, test_run_id +python/tests/test_111_1.py,python,0.02,34 +python/tests/test_113_2.py,python,0.02,34 +python/tests/test_114_1.py,python,0.02,34 +python/tests/test_109_1.py,python,0.02,34 +python/tests/test_117_4.py,python,0.02,34 +python/tests/test_108_2.py,python,0.02,34 +python/tests/test_117_1.py,python,0.02,34 +python/tests/test_136_1.py,python,0.02,34 +python/tests/test_136_2.py,python,0.02,34 +python/tests/test_112_1.py,python,0.02,34 +python/tests/test_107_1.py,python,0.03,34 +python/tests/test_108_3.py,python,0.03,34 +python/tests/test_113_3.py,python,0.03,34 +python/tests/test_117_2.py,python,0.03,34 +python/tests/test_118_1.py,python,0.03,34 +python/tests/test_102_2.py,python,0.03,34 +python/tests/test_103_1.py,python,0.03,34 +python/tests/test_104_1.py,python,0.03,34 +python/tests/test_105_4.py,python,0.03,34 +python/tests/test_106_1.py,python,0.03,34 +python/tests/test_107_2.py,python,0.03,34 +python/tests/test_110_1.py,python,0.03,34 +python/tests/test_111_2.py,python,0.03,34 +python/tests/test_113_4.py,python,0.03,34 +python/tests/test_114_2.py,python,0.03,34 +python/tests/test_116_31.py,python,0.03,34 +python/tests/test_117_3.py,python,0.03,34 +python/tests/test_116_39.py,python,0.03,34 +python/tests/test_113_1.py,python,0.03,34 +python/tests/test_116_15.py,python,0.03,34 +python/tests/test_123_1.py,python,0.04,34 +expect/test6.6,expect,0.07,35 +expect/test21.4,expect,0.07,35 +expect/test38.7,expect,0.07,35 +expect/test17.22,expect,0.07,35 +expect/test1.4,expect,0.08,35 +expect/test1.5,expect,0.08,35 +expect/test5.8,expect,0.08,35 +expect/test6.1,expect,0.08,35 +expect/test1.24,expect,0.08,35 +expect/test15.2,expect,0.08,35 +expect/test15.3,expect,0.08,35 +expect/test17.1,expect,0.08,35 +expect/test21.2,expect,0.08,35 +expect/test21.3,expect,0.08,35 +expect/test3.12,expect,0.08,35 +expect/test32.2,expect,0.08,35 +expect/test32.3,expect,0.08,35 +expect/test34.1,expect,0.08,35 +expect/test35.3,expect,0.08,35 +expect/test35.6,expect,0.08,35 +expect/test40.3,expect,0.08,35 +expect/test40.5,expect,0.08,35 +expect/test15.26,expect,0.08,35 +expect/test39.14,expect,0.08,35 +expect/test4.9,expect,0.09,35 +expect/test5.1,expect,0.09,35 +expect/test6.2,expect,0.09,35 +expect/test7.4,expect,0.09,35 +expect/test1.40,expect,0.09,35 +expect/test1.95,expect,0.09,35 +expect/test15.1,expect,0.09,35 +expect/test16.1,expect,0.09,35 +expect/test16.2,expect,0.09,35 +expect/test16.3,expect,0.09,35 +expect/test17.2,expect,0.09,35 +expect/test19.1,expect,0.09,35 +expect/test2.12,expect,0.09,35 +expect/test21.1,expect,0.09,35 +expect/test27.3,expect,0.09,35 +expect/test35.1,expect,0.09,35 +expect/test35.2,expect,0.09,35 +expect/test35.4,expect,0.09,35 +expect/test35.5,expect,0.09,35 +expect/test4.11,expect,0.09,35 +expect/test1.116,expect,0.09,35 +expect/test15.11,expect,0.09,35 +expect/test15.30,expect,0.09,35 +expect/test15.34,expect,0.09,35 +expect/test17.21,expect,0.09,35 +expect/test28.12,expect,0.09,35 +expect/test32.12,expect,0.09,35 +expect/test2.2,expect,0.10,35 +expect/test2.6,expect,0.10,35 +expect/test2.9,expect,0.10,35 +expect/test4.1,expect,0.10,35 +expect/test4.2,expect,0.10,35 +expect/test4.7,expect,0.10,35 +expect/test5.2,expect,0.10,35 +expect/test5.7,expect,0.10,35 +expect/test7.6,expect,0.10,35 +expect/test1.88,expect,0.10,35 +expect/test1.94,expect,0.10,35 +expect/test1.97,expect,0.10,35 +expect/test12.1,expect,0.10,35 +expect/test12.6,expect,0.10,35 +expect/test19.2,expect,0.10,35 +expect/test2.10,expect,0.10,35 +expect/test2.20,expect,0.10,35 +expect/test27.2,expect,0.10,35 +expect/test32.1,expect,0.10,35 +expect/test32.4,expect,0.10,35 +expect/test34.2,expect,0.10,35 +expect/test40.1,expect,0.10,35 +expect/test40.4,expect,0.10,35 +expect/test1.102,expect,0.10,35 +expect/test1.109,expect,0.10,35 +expect/test15.12,expect,0.10,35 +expect/test15.25,expect,0.10,35 +expect/test15.36,expect,0.10,35 +expect/test17.28,expect,0.10,35 +expect/test17.57,expect,0.10,35 +expect/test17.59,expect,0.10,35 +expect/test32.10,expect,0.10,35 +expect/test32.11,expect,0.10,35 +python/tests/test_115_1.py,python,0.10,34 +expect/test1.3,expect,0.11,35 +expect/test2.1,expect,0.11,35 +expect/test2.3,expect,0.11,35 +expect/test4.4,expect,0.11,35 +expect/test4.5,expect,0.11,35 +expect/test1.65,expect,0.11,35 +expect/test1.96,expect,0.11,35 +expect/test27.1,expect,0.11,35 +expect/test32.5,expect,0.11,35 +expect/test32.7,expect,0.11,35 +expect/test32.9,expect,0.11,35 +expect/test4.10,expect,0.11,35 +expect/test40.2,expect,0.11,35 +expect/test40.8,expect,0.11,35 +expect/test1.106,expect,0.11,35 +expect/test1.108,expect,0.11,35 +expect/test38.13,expect,0.11,35 +expect/test1.99,expect,0.12,35 +expect/test14.1,expect,0.12,35 +expect/test14.2,expect,0.12,35 +expect/test14.3,expect,0.12,35 +expect/test19.3,expect,0.12,35 +expect/test20.4,expect,0.12,35 +expect/test32.8,expect,0.12,35 +expect/test17.24,expect,0.12,35 +expect/test17.53,expect,0.12,35 +expect/test2.4,expect,0.13,35 +expect/test4.3,expect,0.13,35 +expect/test4.8,expect,0.13,35 +expect/test1.18,expect,0.13,35 +expect/test1.64,expect,0.13,35 +expect/test17.3,expect,0.13,35 +expect/test23.1,expect,0.13,35 +expect/test27.5,expect,0.13,35 +expect/test32.6,expect,0.13,35 +expect/test36.1,expect,0.13,35 +expect/test7.16,expect,0.13,35 +expect/test17.10,expect,0.13,35 +expect/test39.20,expect,0.13,35 +python/tests/test_102_1.py,python,0.13,34 +expect/test6.3,expect,0.14,35 +expect/test6.5,expect,0.14,35 +expect/test12.5,expect,0.14,35 +expect/test19.9,expect,0.14,35 +expect/test20.5,expect,0.14,35 +expect/test38.1,expect,0.14,35 +expect/test40.6,expect,0.14,35 +expect/test6.17,expect,0.14,35 +expect/test17.41,expect,0.14,35 +expect/test20.10,expect,0.14,35 +expect/test2.5,expect,0.15,35 +expect/test3.3,expect,0.15,35 +expect/test5.5,expect,0.15,35 +expect/test1.43,expect,0.15,35 +expect/test17.9,expect,0.15,35 +expect/test19.8,expect,0.15,35 +expect/test2.24,expect,0.15,35 +expect/test37.5,expect,0.15,35 +expect/test3.4,expect,0.16,35 +expect/test6.4,expect,0.16,35 +expect/test3.13,expect,0.16,35 +expect/test36.3,expect,0.16,35 +expect/test12.11,expect,0.16,35 +expect/test17.40,expect,0.16,35 +expect/test28.4,expect,0.17,35 +expect/test6.11,expect,0.17,35 +expect/test7.18,expect,0.17,35 +expect/test17.23,expect,0.17,35 +expect/test39.11,expect,0.17,35 +python/tests/test_133_1.py,python,0.17,34 +expect/test3.6,expect,0.18,35 +expect/test15.7,expect,0.18,35 +expect/test38.3,expect,0.18,35 +expect/test38.5,expect,0.18,35 +expect/test20.8,expect,0.19,35 +expect/test15.31,expect,0.19,35 +expect/test20.7,expect,0.20,35 +expect/test1.103,expect,0.20,35 +expect/test17.34,expect,0.20,35 +python/tests/test_126_1.py,python,0.20,34 +python/tests/test_116_28.py,python,0.20,34 +expect/test17.11,expect,0.21,35 +expect/test17.54,expect,0.21,35 +expect/test24.2,expect,0.22,35 +expect/test28.1,expect,0.22,35 +expect/test28.3,expect,0.22,35 +expect/test6.10,expect,0.22,35 +expect/test20.2,expect,0.23,35 +expect/test36.2,expect,0.23,35 +expect/test15.13,expect,0.23,35 +expect/test7.1,expect,0.24,35 +expect/test15.9,expect,0.24,35 +expect/test15.18,expect,0.24,35 +expect/test17.19,expect,0.24,35 +expect/test17.31,expect,0.24,35 +expect/test15.6,expect,0.25,35 +expect/test20.6,expect,0.25,35 +expect/test20.9,expect,0.25,35 +expect/test19.6,expect,0.26,35 +expect/test20.11,expect,0.26,35 +expect/test39.12,expect,0.26,35 +expect/test2.26,expect,0.27,35 +expect/test7.17,expect,0.27,35 +expect/test15.23,expect,0.28,35 +expect/test4.14,expect,0.29,35 +expect/test21.25,expect,0.29,35 +expect/test15.39,expect,0.30,35 +expect/test37.2,expect,0.31,35 +expect/test17.25,expect,0.31,35 +expect/test21.41,expect,0.31,35 +expect/test3.9,expect,0.32,35 +expect/test3.18,expect,0.32,35 +expect/test37.3,expect,0.32,35 +expect/test39.4,expect,0.35,35 +expect/test1.11,expect,0.36,35 +expect/test1.16,expect,0.36,35 +expect/test15.4,expect,0.36,35 +expect/test1.51,expect,0.37,35 +expect/test6.14,expect,0.37,35 +expect/test1.117,expect,0.37,35 +expect/test15.15,expect,0.37,35 +expect/test1.10,expect,0.38,35 +expect/test27.4,expect,0.39,35 +expect/test15.10,expect,0.39,35 +expect/test15.21,expect,0.39,35 +expect/test1.1,expect,0.40,35 +expect/test15.16,expect,0.40,35 +expect/test1.71,expect,0.41,35 +expect/test1.13,expect,0.42,35 +expect/test1.56,expect,0.42,35 +expect/test1.50,expect,0.43,35 +expect/test39.9,expect,0.43,35 +expect/test20.13,expect,0.43,35 +expect/test28.5,expect,0.45,35 +expect/test1.27,expect,0.46,35 +expect/test1.34,expect,0.47,35 +expect/test1.91,expect,0.47,35 +expect/test1.83,expect,0.49,35 +expect/test4.13,expect,0.50,35 +expect/test1.37,expect,0.53,35 +expect/test1.70,expect,0.53,35 +expect/test1.9,expect,0.56,35 +expect/test15.24,expect,0.56,35 +python/tests/test_130_1.py,python,0.56,34 +expect/test41.1,expect,0.57,35 +expect/test39.1,expect,0.58,35 +expect/test7.15,expect,0.58,35 +python/tests/test_116_47.py,python,0.59,34 +expect/test1.85,expect,0.61,35 +expect/test1.6,expect,0.64,35 +expect/test21.37,expect,0.65,35 +expect/test39.6,expect,0.70,35 +expect/test39.16,expect,0.70,35 +expect/test1.20,expect,0.71,35 +expect/test1.87,expect,0.71,35 +expect/test1.2,expect,0.76,35 +expect/test7.2,expect,0.78,35 +expect/test20.14,expect,0.79,35 +python/tests/test_116_40.py,python,0.85,34 +expect/test1.82,expect,0.87,35 +expect/test15.20,expect,0.87,35 +expect/test1.101,expect,0.89,35 +expect/test39.15,expect,0.89,35 +expect/test1.80,expect,0.90,35 +expect/test1.113,expect,0.90,35 +expect/test1.23,expect,0.96,35 +expect/test1.62,expect,1.09,35 +expect/test39.2,expect,1.11,35 +expect/test1.86,expect,1.15,35 +expect/test1.92,expect,1.15,35 +expect/test33.1,expect,1.17,35 +expect/test1.93,expect,1.18,35 +expect/test1.21,expect,1.23,35 +expect/test1.28,expect,1.25,35 +expect/test24.3,expect,1.25,35 +expect/test15.32,expect,1.25,35 +expect/test2.16,expect,1.26,35 +expect/test24.1,expect,1.26,35 +expect/test6.16,expect,1.26,35 +expect/test1.100,expect,1.26,35 +expect/test17.55,expect,1.26,35 +expect/test1.81,expect,1.27,35 +expect/test1.58,expect,1.29,35 +expect/test20.3,expect,1.29,35 +expect/test24.4,expect,1.30,35 +expect/test15.19,expect,1.33,35 +python/tests/test_130_2.py,python,1.34,34 +expect/test1.52,expect,1.35,35 +expect/test1.89,expect,1.41,35 +expect/test41.2,expect,1.41,35 +python/tests/test_116_35.py,python,1.43,34 +python/tests/test_105_3.py,python,1.44,34 +python/tests/test_105_1.py,python,1.51,34 +expect/test4.12,expect,1.52,35 +python/tests/test_108_5.py,python,1.55,34 +python/tests/test_111_3.py,python,1.55,34 +python/tests/test_114_3.py,python,1.56,34 +expect/test1.30,expect,1.57,35 +expect/test1.31,expect,1.62,35 +python/tests/test_116_23.py,python,1.62,34 +python/tests/test_108_1.py,python,1.62,34 +expect/test39.3,expect,1.63,35 +expect/test1.36,expect,1.66,35 +python/tests/test_116_22.py,python,1.72,34 +expect/test1.46,expect,1.74,35 +expect/test14.10,expect,1.78,35 +python/tests/test_137_1.py,python,1.81,34 +python/tests/test_116_21.py,python,1.82,34 +expect/test22.2,expect,1.84,35 +python/tests/test_132_1.py,python,1.84,34 +python/tests/test_116_11.py,python,1.85,34 +expect/test5.11,expect,1.87,35 +expect/test2.17,expect,1.88,35 +python/tests/test_108_4.py,python,1.93,34 +python/tests/test_119_1.py,python,1.96,34 +python/tests/test_122_1.py,python,2.01,34 +expect/test39.18,expect,2.02,35 +python/tests/test_116_6.py,python,2.10,34 +expect/test39.8,expect,2.12,35 +expect/test17.26,expect,2.15,35 +expect/test2.7,expect,2.16,35 +expect/test17.4,expect,2.16,35 +expect/test17.43,expect,2.16,35 +python/tests/test_116_4.py,python,2.16,34 +expect/test17.13,expect,2.18,35 +python/tests/test_116_32.py,python,2.22,34 +expect/test14.6,expect,2.24,35 +python/tests/test_116_8.py,python,2.24,34 +python/tests/test_116_41.py,python,2.26,34 +expect/test6.9,expect,2.30,35 +expect/test1.54,expect,2.35,35 +expect/test1.118,expect,2.43,35 +expect/test1.55,expect,2.44,35 +python/tests/test_135_1.py,python,2.44,34 +expect/test1.77,expect,2.54,35 +expect/test1.32,expect,2.55,35 +expect/test1.63,expect,2.59,35 +expect/test21.7,expect,2.62,35 +expect/test3.16,expect,2.64,35 +expect/test39.23,expect,2.65,35 +python/tests/test_116_9.py,python,2.65,34 +python/tests/test_121_1.py,python,2.65,34 +python/tests/test_124_1.py,python,2.66,34 +expect/test1.59,expect,2.67,35 +python/tests/test_109_2.py,python,2.68,34 +expect/test15.22,expect,2.73,35 +expect/test7.5,expect,2.74,35 +expect/test1.15,expect,2.74,35 +expect/test21.5,expect,2.75,35 +python/tests/test_116_26.py,python,2.78,34 +expect/test2.25,expect,2.81,35 +expect/test21.29,expect,2.88,35 +python/tests/test_116_20.py,python,2.90,34 +expect/test17.20,expect,2.91,35 +python/tests/test_116_17.py,python,2.95,34 +python/tests/test_116_34.py,python,2.95,34 +python/tests/test_116_18.py,python,2.98,34 +expect/test8.14,expect,3.10,35 +expect/test17.32,expect,3.16,35 +expect/test17.45,expect,3.18,35 +expect/test1.45,expect,3.19,35 +expect/test17.16,expect,3.19,35 +expect/test14.5,expect,3.20,35 +expect/test14.7,expect,3.22,35 +expect/test15.8,expect,3.24,35 +expect/test14.8,expect,3.26,35 +python/tests/test_116_1.py,python,3.27,34 +python/tests/test_116_37.py,python,3.28,34 +expect/test1.104,expect,3.29,35 +expect/test24.5,expect,3.44,35 +python/tests/test_116_5.py,python,3.47,34 +python/tests/test_116_42.py,python,3.49,34 +expect/test17.63,expect,3.53,35 +expect/test1.84,expect,3.66,35 +expect/test21.6,expect,3.67,35 +expect/test15.33,expect,3.69,35 +expect/test1.38,expect,3.73,35 +expect/test7.14,expect,3.85,35 +python/tests/test_116_38.py,python,3.86,34 +expect/test15.27,expect,3.89,35 +expect/test21.12,expect,3.90,35 +expect/test25.1,expect,3.92,35 +expect/test21.10,expect,4.00,35 +python/tests/test_119_2.py,python,4.07,34 +expect/test5.3,expect,4.10,35 +expect/test4.6,expect,4.11,35 +expect/test1.105,expect,4.14,35 +expect/test17.29,expect,4.17,35 +expect/test20.1,expect,4.22,35 +expect/test5.4,expect,4.30,35 +expect/test6.8,expect,4.30,35 +expect/test2.14,expect,4.33,35 +expect/test7.12,expect,4.40,35 +python/tests/test_102_3.py,python,4.45,34 +expect/test21.36,expect,4.49,35 +expect/test21.31,expect,4.52,35 +python/tests/test_116_46.py,python,4.56,34 +expect/test1.90,expect,4.58,35 +expect/test1.17,expect,4.65,35 +python/tests/test_105_2.py,python,4.73,34 +expect/test5.10,expect,4.75,35 +expect/test7.19,expect,4.89,35 +expect/test7.13,expect,4.93,35 +python/tests/test_116_19.py,python,4.98,34 +expect/test1.33,expect,5.00,35 +expect/test14.9,expect,5.15,35 +expect/test17.17,expect,5.17,35 +expect/test16.4,expect,5.23,35 +expect/test36.4,expect,5.23,35 +expect/test21.33,expect,5.24,35 +python/tests/test_116_24.py,python,5.24,34 +expect/test1.66,expect,5.25,35 +expect/test14.4,expect,5.26,35 +expect/test38.18,expect,5.26,35 +expect/test39.21,expect,5.26,35 +python/tests/test_101_1.py,python,5.28,34 +expect/test17.8,expect,5.32,35 +expect/test37.4,expect,5.42,35 +expect/test37.15,expect,5.42,35 +expect/test37.6,expect,5.43,35 +expect/test37.13,expect,5.43,35 +expect/test37.7,expect,5.44,35 +expect/test37.16,expect,5.44,35 +expect/test37.10,expect,5.45,35 +expect/test37.12,expect,5.45,35 +expect/test37.14,expect,5.45,35 +expect/test37.11,expect,5.46,35 +expect/test1.72,expect,5.47,35 +expect/test37.8,expect,5.47,35 +expect/test37.9,expect,5.47,35 +python/tests/test_113_5.py,python,5.52,34 +expect/test21.9,expect,5.56,35 +expect/test28.6,expect,5.58,35 +expect/test21.11,expect,5.62,35 +expect/test21.13,expect,5.63,35 +expect/test7.10,expect,5.67,35 +expect/test21.8,expect,5.77,35 +expect/test3.17,expect,5.77,35 +python/tests/test_116_14.py,python,6.01,34 +expect/test21.42,expect,6.18,35 +python/tests/test_116_30.py,python,6.18,34 +expect/test17.6,expect,6.25,35 +python/tests/test_116_43.py,python,6.25,34 +expect/test39.7,expect,6.32,35 +expect/test21.17,expect,6.47,35 +expect/test21.19,expect,6.64,35 +expect/test1.114,expect,6.95,35 +expect/test1.107,expect,6.96,35 +expect/test12.3,expect,6.97,35 +expect/test39.10,expect,7.02,35 +expect/test2.11,expect,7.26,35 +expect/test17.33,expect,7.27,35 +expect/test12.9,expect,7.41,35 +expect/test1.8,expect,7.49,35 +expect/test3.10,expect,7.57,35 +expect/test15.38,expect,7.78,35 +expect/test1.22,expect,7.89,35 +expect/test21.18,expect,7.93,35 +expect/test15.35,expect,7.97,35 +expect/test21.15,expect,8.01,35 +expect/test21.14,expect,8.03,35 +python/tests/test_116_7.py,python,8.08,34 +expect/test21.22,expect,8.15,35 +expect/test21.16,expect,8.18,35 +python/tests/test_128_1.py,python,8.19,34 +python/tests/test_116_16.py,python,8.31,34 +expect/test1.35,expect,8.34,35 +expect/test1.120,expect,8.60,35 +expect/test17.56,expect,9.03,35 +expect/test19.4,expect,9.28,35 +expect/test17.27,expect,9.35,35 +expect/test3.1,expect,9.37,35 +expect/test38.19,expect,9.44,35 +expect/test21.20,expect,9.57,35 +expect/test21.32,expect,9.74,35 +expect/test21.38,expect,10.04,35 +expect/test17.14,expect,10.35,35 +expect/test1.115,expect,10.38,35 +python/tests/test_129_1.py,python,10.41,34 +expect/test21.27,expect,10.66,35 +expect/test1.29,expect,10.68,35 +python/tests/test_116_2.py,python,10.70,34 +expect/test21.24,expect,11.13,35 +expect/test28.13,expect,11.19,35 +expect/test3.2,expect,11.23,35 +expect/test17.58,expect,11.29,35 +expect/test19.7,expect,11.33,35 +expect/test17.62,expect,11.41,35 +expect/test1.60,expect,11.42,35 +expect/test7.23,expect,11.43,35 +expect/test23.2,expect,11.45,35 +expect/test21.28,expect,11.82,35 +expect/test28.10,expect,12.52,35 +python/tests/test_127_1.py,python,12.72,34 +expect/test21.26,expect,12.91,35 +expect/test31.3,expect,13.01,35 +expect/test7.9,expect,13.79,35 +python/tests/test_134_1.py,python,14.02,34 +expect/test5.9,expect,14.53,35 +expect/test1.119,expect,14.64,35 +expect/test28.2,expect,15.33,35 +expect/test2.8,expect,15.55,35 +expect/test1.76,expect,15.88,35 +expect/test1.44,expect,15.99,35 +expect/test2.27,expect,16.15,35 +expect/test17.36,expect,16.17,35 +expect/test5.6,expect,16.45,35 +python/tests/test_116_3.py,python,16.54,34 +python/tests/test_121_2.py,python,16.87,34 +expect/test38.2,expect,17.45,35 +expect/test20.15,expect,17.45,35 +expect/test28.8,expect,17.52,35 +expect/test20.12,expect,17.58,35 +expect/test24.6,expect,17.62,35 +expect/test21.35,expect,18.26,35 +expect/test9.8,expect,18.40,35 +expect/test9.7,expect,18.48,35 +expect/test8.13,expect,18.79,35 +expect/test2.18,expect,19.09,35 +python/tests/test_116_13.py,python,19.32,34 +python/tests/test_116_25.py,python,19.81,34 +expect/test17.37,expect,21.39,35 +expect/test2.23,expect,21.52,35 +expect/test1.49,expect,21.68,35 +expect/test2.19,expect,21.77,35 +expect/test22.1,expect,22.18,35 +python/tests/test_116_44.py,python,22.34,34 +python/tests/test_116_12.py,python,22.42,34 +expect/test7.21,expect,22.52,35 +expect/test2.21,expect,22.55,35 +expect/test1.12,expect,22.59,35 +expect/test24.7,expect,22.83,35 +python/tests/test_138_1.py,python,23.21,34 +expect/test2.22,expect,23.52,35 +expect/test38.9,expect,25.21,35 +expect/test15.14,expect,25.38,35 +expect/test38.16,expect,25.73,35 +expect/test1.14,expect,25.79,35 +expect/test17.64,expect,25.94,35 +expect/test1.112,expect,26.08,35 +expect/test1.110,expect,26.17,35 +expect/test17.52,expect,27.56,35 +expect/test28.9,expect,28.25,35 +expect/test21.44,expect,28.28,35 +expect/test38.12,expect,28.92,35 +expect/test38.17,expect,28.99,35 +expect/test38.14,expect,29.34,35 +expect/test28.7,expect,29.39,35 +expect/test38.10,expect,29.49,35 +expect/test1.74,expect,29.57,35 +expect/test21.23,expect,30.18,35 +expect/test39.22,expect,30.48,35 +expect/test1.68,expect,30.50,35 +expect/test1.25,expect,30.58,35 +expect/test38.4,expect,30.80,35 +expect/test17.60,expect,30.95,35 +expect/test38.8,expect,31.29,35 +expect/test21.43,expect,32.31,35 +expect/test3.14,expect,32.44,35 +expect/test3.15,expect,32.67,35 +expect/test38.11,expect,32.68,35 +expect/test1.19,expect,32.74,35 +expect/test17.5,expect,33.82,35 +expect/test1.111,expect,33.84,35 +expect/test38.15,expect,34.33,35 +expect/test2.15,expect,35.53,35 +expect/test28.11,expect,35.74,35 +python/tests/test_116_33.py,python,36.03,34 +expect/test15.37,expect,37.29,35 +expect/test17.18,expect,37.59,35 +expect/test17.61,expect,37.74,35 +expect/test17.39,expect,37.92,35 +expect/test12.4,expect,39.42,35 +expect/test17.42,expect,42.14,35 +expect/test7.11,expect,43.59,35 +expect/test39.19,expect,45.37,35 +expect/test37.1,expect,45.45,35 +expect/test17.51,expect,45.60,35 +expect/test12.2,expect,46.46,35 +expect/test6.15,expect,48.25,35 +expect/test1.67,expect,50.52,35 +expect/test1.69,expect,50.63,35 +expect/test3.7,expect,54.72,35 +expect/test31.2,expect,55.07,35 +python/tests/test_125_1.py,python,55.11,34 +expect/test7.7,expect,57.87,35 +expect/test12.10,expect,61.71,35 +expect/test38.6,expect,61.79,35 +expect/test39.5,expect,69.15,35 +expect/test19.5,expect,69.26,35 +expect/test12.8,expect,69.50,35 +expect/test21.39,expect,69.53,35 +expect/test21.21,expect,69.84,35 +expect/test1.61,expect,70.84,35 +expect/test17.12,expect,73.71,35 +expect/test3.5,expect,73.78,35 +expect/test1.41,expect,76.85,35 +expect/test2.13,expect,81.42,35 +expect/test39.17,expect,81.52,35 +expect/test6.7,expect,82.45,35 +expect/test7.24,expect,82.49,35 +expect/test21.40,expect,83.45,35 +python/tests/test_116_36.py,python,91.16,34 +python/tests/test_116_27.py,python,95.46,34 +expect/test17.44,expect,96.01,35 +expect/test21.30,expect,96.13,35 +expect/test1.53,expect,100.14,35 +expect/test7.20,expect,103.23,35 +expect/test1.26,expect,105.12,35 +expect/test9.9,expect,114.26,35 +expect/test6.13,expect,115.62,35 +expect/test1.48,expect,124.98,35 +expect/test7.22,expect,129.60,35 +expect/test9.5,expect,135.30,35 +expect/test9.3,expect,135.47,35 +expect/test9.2,expect,140.30,35 +expect/test9.1,expect,140.35,35 +expect/test9.6,expect,141.06,35 +expect/test39.13,expect,149.02,35 +expect/test3.11,expect,154.43,35 +python/tests/test_116_45.py,python,181.36,34 +expect/test21.34,expect,182.14,35 +expect/test1.75,expect,185.76,35 +expect/test37.17,expect,199.60,35 +expect/test1.7,expect,266.54,35 +expect/test15.5,expect,266.56,35 +expect/test17.38,expect,308.49,35 +expect/test1.73,expect,337.49,35 +expect/test12.7,expect,343.62,35 +expect/test3.8,expect,442.70,35 +expect/test9.4,expect,448.89,35 +expect/test8.12,expect,512.52,35 From 777f586b37d6509907183c9880de3426790d114c Mon Sep 17 00:00:00 2001 From: Jonathan de Gaston Date: Fri, 4 Aug 2023 17:27:41 +0000 Subject: [PATCH 51/81] Testsuite - Create test_123_4.py Test that jobs cannot be submitted to a reservation by users that are not granted access to that reservation. Bug 17202 --- testsuite/README | 1 + testsuite/python/tests/test_123_4.py | 39 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100755 testsuite/python/tests/test_123_4.py diff --git a/testsuite/README b/testsuite/README index a3bcd53fdc9..6305a2d3d9e 100644 --- a/testsuite/README +++ b/testsuite/README @@ -962,6 +962,7 @@ test_122_2 Test job array with gres test_123_# Testing of reservations. ===================================== test_123_1 /features/reservations/test_flags.py +test_123_4 Test reservations access test_124_# Testing of AccountingStorageEnforce. ================================================= diff --git a/testsuite/python/tests/test_123_4.py b/testsuite/python/tests/test_123_4.py new file mode 100755 index 00000000000..316039204cb --- /dev/null +++ b/testsuite/python/tests/test_123_4.py @@ -0,0 +1,39 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import pytest +import atf + + +@pytest.fixture(scope='module', autouse=True) +def setup(): + atf.require_slurm_running() + yield + cleanup() + + +def cleanup(): + result = atf.run_command(f"scontrol delete reservationname={res_name}", + user=atf.properties['slurm-user']) + assert result['exit_code'] == 0, \ + "Couldn't delete the reservation!" + + +def test_reservation_user(): + """Test that a reservation created for SlurmUser can't be used by atf""" + + # Create the reservation for user slurm + global res_name + res_name = "resv1" + create_res = f"scontrol create reservationname={res_name} " \ + f"user={atf.properties['slurm-user']} start=now duration=1 nodecnt=1" + result = atf.run_command(create_res, user=atf.properties['slurm-user']) + assert result['exit_code'] == 0, \ + "Couldn't create the reservation!" + + # Try to run a job as atf + result = atf.run_command(f"srun -N1 --reservation={res_name} true", user=atf.properties['test-user']) + assert result['exit_code'] != 0, \ + "The job should have been denied for user!" + assert "Access denied" in result['stderr'], \ + "The job should have been denied for user!" From 9baa7f951bfa319d35b894388911559f7a5e1286 Mon Sep 17 00:00:00 2001 From: Jonathan de Gaston Date: Thu, 10 Aug 2023 17:26:29 +0000 Subject: [PATCH 52/81] Testsuite - Create test_138_2.py Create test_138_2 to test for parallel execution of user commands. Bug 12884 --- testsuite/README | 1 + testsuite/python/tests/test_138_2.py | 93 ++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 testsuite/python/tests/test_138_2.py diff --git a/testsuite/README b/testsuite/README index 6305a2d3d9e..1f96e69db7b 100644 --- a/testsuite/README +++ b/testsuite/README @@ -1022,6 +1022,7 @@ test_137_1 /plugins/node_features/helpers/test_plugin.py test_138_# Testing stressing cases. ===================================== test_138_1 /stress/test_stdin_broadcast.py +test_138_2 Test parallel performance of squeue test_143_# Testing --switches. ================================ diff --git a/testsuite/python/tests/test_138_2.py b/testsuite/python/tests/test_138_2.py new file mode 100644 index 00000000000..3ee4698a0c4 --- /dev/null +++ b/testsuite/python/tests/test_138_2.py @@ -0,0 +1,93 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import pytest +import atf + + +@pytest.fixture(scope="module", autouse=True) +def setup(): + atf.require_slurm_running() + + +def make_and_run_bash(lines : list[str]) -> None: + """Make and run the bash script. + Input is a list of lines to be run as bash script.""" + script_name = "script.sh" + script = "\n".join(lines) + atf.make_bash_script(script_name, script) + atf.run_command(f"bash {script_name}") + + +@pytest.mark.parametrize("command, phrase", [ + ("sinfo", "NODELIST"), + ("scontrol show node", "NodeName="), +]) +def test_parallel(command, phrase): + """Test that sinfo and scontrol can be run in parallel. We submit + 1000 user commands to slurm to make sure that it doesn't crash. We then + check the output that the correct number of commands were run.""" + + script_out = str(atf.module_tmp_path / f"{command[:5]}.out") + # Cancel all jobs so that the the queue is empty + atf.cancel_all_jobs() + + script_lines = [ + 'for i in $(seq 1 1000)', + f' do {command} &', + f'done > {script_out}', + 'wait', + ] + make_and_run_bash(script_lines) + + output = atf.run_command_output(f"cat {script_out} | grep -c '{phrase}'") + assert int(output) == 1000, \ + f"We expected 1000 commands to be run in parallel, but got {output}" + + +def test_squeue_parallel(): + """Test for when lots of squeue calls are made, all the commands still run + correctly. To test this we submit 100 jobs, then we run 1000 squeue + commands and make sure that all 1000 worked and produced output.""" + + script_out = str(atf.module_tmp_path / "squeue.out") + # Cancel all jobs so that the the queue is empty + atf.cancel_all_jobs() + # Submit 100 jobs to fill up the queue + for i in range(100): + atf.submit_job_sbatch("--wrap='sleep 100'") + + script_lines = [ + 'for i in $(seq 1 1000)', + ' do squeue &', + f'done > {script_out}', + 'wait', + ] + make_and_run_bash(script_lines) + + output = atf.run_command_output(f"cat {script_out} | grep -c 'JOBID'") + assert int(output) == 1000, \ + f"We expected 1000 user commands to run, but got {int(output)}" + + +def test_show_jobs_parallel(): + """Test that scontrol show job works in parallel. We submit one job and + then run 1000 'scontrol show job {job_id} &' commands to make sure that all + of them provide the correct output""" + + script_out = str(atf.module_tmp_path / "job.out") + # Cancel all jobs so that the the queue is empty + atf.cancel_all_jobs() + job_id = atf.submit_job_srun("true") + + script_lines = [ + 'for i in $(seq 1 1000)', + f' do scontrol show job {job_id} &', + f'done > {script_out}', + 'wait' + ] + make_and_run_bash(script_lines) + + output = atf.run_command_output(f'cat {script_out} | grep -c "JobId="') + assert int(output) == 1000, \ + f"We expected 1000 commands to be run in parallel, but got {output}" From 5da2fb26d0bf093938aff3498cfac8c5ee9a988c Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Fri, 11 Aug 2023 18:07:13 +0000 Subject: [PATCH 53/81] Update README --- testsuite/README | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testsuite/README b/testsuite/README index 1f96e69db7b..ed33bd7ec74 100644 --- a/testsuite/README +++ b/testsuite/README @@ -962,6 +962,8 @@ test_122_2 Test job array with gres test_123_# Testing of reservations. ===================================== test_123_1 /features/reservations/test_flags.py +test_123_2 Test RESV_DEL_HOLD +test_123_3 Test overlapping reservations. test_123_4 Test reservations access test_124_# Testing of AccountingStorageEnforce. From ad2ac781a6c4ab117d1935b86f3b53ca9c6898d8 Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Fri, 11 Aug 2023 12:34:56 -0600 Subject: [PATCH 54/81] Undo update README --- testsuite/README | 2 -- 1 file changed, 2 deletions(-) diff --git a/testsuite/README b/testsuite/README index ed33bd7ec74..1f96e69db7b 100644 --- a/testsuite/README +++ b/testsuite/README @@ -962,8 +962,6 @@ test_122_2 Test job array with gres test_123_# Testing of reservations. ===================================== test_123_1 /features/reservations/test_flags.py -test_123_2 Test RESV_DEL_HOLD -test_123_3 Test overlapping reservations. test_123_4 Test reservations access test_124_# Testing of AccountingStorageEnforce. From 0c316f46f7a9788efc20ae9f53a1a069d41b4eeb Mon Sep 17 00:00:00 2001 From: Jonathan de Gaston Date: Tue, 8 Aug 2023 16:51:08 +0000 Subject: [PATCH 55/81] Testsuite - Create test_103_2.py Test salloc defaults with no commands. Bug 15415 --- testsuite/README | 1 + testsuite/python/tests/test_103_2.py | 47 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 testsuite/python/tests/test_103_2.py diff --git a/testsuite/README b/testsuite/README index 1f96e69db7b..9eb70ba85ba 100644 --- a/testsuite/README +++ b/testsuite/README @@ -823,6 +823,7 @@ test_102_2 /commands/sacctmgr/test_--usage.py test_103_# Testing of salloc options. ======================================= test_103_1 /commands/salloc/test_--usage.py +test_103_2 Test salloc with default command test_104_# Testing of sattach options. ======================================== diff --git a/testsuite/python/tests/test_103_2.py b/testsuite/python/tests/test_103_2.py new file mode 100644 index 00000000000..dfc926a969a --- /dev/null +++ b/testsuite/python/tests/test_103_2.py @@ -0,0 +1,47 @@ +############################################################################ +# Copyright (C) SchedMD LLC. +############################################################################ +import atf +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def setup(): + atf.require_slurm_running() + + +def test_salloc_normal(): + """Test salloc allocations without commands. We test the stderr and + stdout because the exit_codes seem to be 0 even when it has error messages. + The normal allocations should have no error output with the -Q flag.""" + + # Test that normal salloc works correctly + result = atf.run_command("salloc -Q &") + assert result['exit_code'] == 0, \ + "Exit code was not 0!" + assert result['stderr'] == "", \ + f"There should be no error messages from the salloc command. Got: {result['stderr']}" + assert result['stdout'] == "", \ + f"There should be no stdout from the salloc command. Got: {result['stdout']}" + + atf.cancel_all_jobs() + + # Test that salloc -n1 works correctly + result = atf.run_command("salloc -Q -n1 &") + assert result['exit_code'] == 0, \ + "Exit code was not 0!" + assert result['stderr'] == "", \ + f"There should be no error messages from the salloc command. Got: {result['stderr']}" + assert result['stdout'] == "", \ + f"There should be no stdout from the salloc command. Got: {result['stdout']}" + + atf.cancel_all_jobs() + + # Test that salloc -n2 will wait for resources correctly with only one node + result = atf.run_command("salloc -Q -n2 &", timeout=3) + assert result['exit_code'] == 110, \ + "Exit code was not 110 (timeout)!" + assert result['stderr'] == "", \ + f"There should be no error messages from the salloc command. Got: {result['stderr']}" + assert result['stdout'] == "", \ + f"There should be no stdout from the salloc command. Got: {result['stdout']}" From 26c9d63758dc7c9a4b03c72acd890709747bfba9 Mon Sep 17 00:00:00 2001 From: Tom Johns Date: Fri, 11 Aug 2023 23:15:51 +0000 Subject: [PATCH 56/81] Testsuite - Fix README typo --- testsuite/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/README b/testsuite/README index 9eb70ba85ba..2a7106dc38f 100644 --- a/testsuite/README +++ b/testsuite/README @@ -963,7 +963,7 @@ test_122_2 Test job array with gres test_123_# Testing of reservations. ===================================== test_123_1 /features/reservations/test_flags.py -test_123_4 Test reservations access +test_123_4 Test reservation access test_124_# Testing of AccountingStorageEnforce. ================================================= From 5d47359ac75c902919d2f17c0a3d61a76e263db9 Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Wed, 23 Aug 2023 01:23:07 +0000 Subject: [PATCH 57/81] Testsuite - Fix wrong skip in test1.12 The || instead of && in an if statement was causing test1.12 to be incorrectly skipped every time, even with cons_[t]res configured. Bug 17496 --- testsuite/expect/test1.12 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testsuite/expect/test1.12 b/testsuite/expect/test1.12 index 9393171f34d..a95e6f2e3f6 100755 --- a/testsuite/expect/test1.12 +++ b/testsuite/expect/test1.12 @@ -193,9 +193,10 @@ if {[set_nodes_and_threads_by_request "-N1"]} { skip "Test needs to be able to submit a job with -N1." } -if {![check_config_select "cons_tres"] || ![check_config_select "cons_res"]} { +if {![check_config_select "cons_tres"] && ![check_config_select "cons_res"]} { skip "Test requires SelectType=select/cons_tres or cons_res" } + testproc test_overlap_after_overlap testproc test_overlap_after_exclusive testproc test_exclusive_after_overlap From c2b1af360af8ed32c51199de34089819907987a8 Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Wed, 23 Aug 2023 01:45:01 +0000 Subject: [PATCH 58/81] Testsuite - Fix test1.12 skipping test_overlap_gpus if no CR_*MEMORY Skip test_overlap_gpus if CR_*MEMORY isn't set in SelectTypeParameters, it needs it. Bug 17496 --- testsuite/expect/test1.12 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/testsuite/expect/test1.12 b/testsuite/expect/test1.12 index a95e6f2e3f6..97826cce8e6 100755 --- a/testsuite/expect/test1.12 +++ b/testsuite/expect/test1.12 @@ -207,9 +207,7 @@ if {![param_contains [get_config_param "SelectTypeParameters"] "CR_*MEMORY"]} { testproc test_overlap_memory -run_following_testprocs - -# Only if gres/gpu is configured +# Only if gres/gpu is configured, and CR_*MEMORY if {[set_nodes_and_threads_by_request "--gres=gpu:1"] || ![param_contains [get_config_param "AccountingStorageTRES"] "*gpu"]} { skip_following_testprocs "Testproc needs to be able to submit a job with --gres=gpu:1 and AccountingStorageTRES with GPUs." } From 920384c952f31dce5c9ffeb9df15f4ed97742b3f Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Mon, 28 Aug 2023 18:46:06 +0200 Subject: [PATCH 59/81] Testsuite - Fix minor typo Bug 14976 --- testsuite/expect/globals | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/globals b/testsuite/expect/globals index cf027b96f39..94fb24d164a 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -1778,7 +1778,7 @@ proc wait_for args { # OPTIONS # -fail # fail the testif the condition is not met within the timeout -# -subfail +# -subtest # if the condition is met within the timeout call subpass, otherwise # call subfail # -timeout From ae881fe20fb17e73e913b19a1c2583d2d64ef952 Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Mon, 28 Aug 2023 18:46:40 +0200 Subject: [PATCH 60/81] Testsuite - Improve test21.31 using wait_for Bug 14976 --- testsuite/expect/test21.31 | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/testsuite/expect/test21.31 b/testsuite/expect/test21.31 index 17cde73dde0..ad46e4029f4 100755 --- a/testsuite/expect/test21.31 +++ b/testsuite/expect/test21.31 @@ -144,25 +144,11 @@ if {[check_resource_limits $sr1 [array get resource1_chck]] == 1} { # use scontrol show license to verify the cluster license was created # set matches 0 -spawn $scontrol show license -expect { - -re "LicenseName=($sr1@$resource1(Server))" { - incr matches - exp_continue - } - -re "Total=$ect1 Used=0 Free=$ect1 Reserved=0 Remote=yes" { - incr matches - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -if {$matches != 2} { - fail "License output is incorrect ($matches != 2. Expected: LicenseName=$sr1@$resource1(Server). Expected: Total=$ect1 Used=0 Free=$ect1 Reserved=0 Remote=yes)" +wait_for -subtest {$matches == 2} { + set output [run_command_output -fail "$scontrol show license"] + set matches 0 + incr matches [regexp "LicenseName=($sr1@$resource1(Server))" $output] + incr matches [regexp "Total=$ect1 Used=0 Free=$ect1 Reserved=0 Remote=yes" $output] } # From c6e78eb9013804e6dee206fd52e2666c2d360181 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Tue, 29 Aug 2023 18:57:27 +0200 Subject: [PATCH 61/81] Testsuite - Skip test21.34 for select/linear Although some subtest can be run because test node limits, most of the test fails in select/linear because all CPUs of a node are allocated regardless of the task or CPUs requested. Bug 15724 --- testsuite/expect/test21.34 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/testsuite/expect/test21.34 b/testsuite/expect/test21.34 index f5776289cf5..5ce3b500482 100755 --- a/testsuite/expect/test21.34 +++ b/testsuite/expect/test21.34 @@ -164,6 +164,9 @@ if {[param_contains $select_type_parameters "CR_ONE_TASK_PER_CORE"]} { if {[param_contains $select_type_parameters "CR_CORE_*"]} { set selectparam 1 } +if {[param_contains [get_config_param "SelectType"] "select/linear"]} { + skip "This test is incompatible select/linear" +} if {[get_config_param "PriorityType"] eq "priority/multifactor"} { set prio_multifactor 1 From bd7226e67e3219245ddddfc1d45efbfed2150af6 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Tue, 29 Aug 2023 19:03:02 +0200 Subject: [PATCH 62/81] Testsuite - Improve logging messages of test21.3* Bug 15724 --- testsuite/expect/inc21.30.1 | 4 ++-- testsuite/expect/inc21.30.11 | 4 ++-- testsuite/expect/inc21.30.16 | 4 ++-- testsuite/expect/inc21.30.3 | 4 ++-- testsuite/expect/inc21.30.6 | 4 ++-- testsuite/expect/inc21.30.7 | 4 ++-- testsuite/expect/inc21.34.1 | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/testsuite/expect/inc21.30.1 b/testsuite/expect/inc21.30.1 index 4d798c6cc64..52b1fb3da58 100644 --- a/testsuite/expect/inc21.30.1 +++ b/testsuite/expect/inc21.30.1 @@ -30,7 +30,7 @@ proc inc21_30_1 { wait_reason } { global srun salloc acct bin_sleep number grn_num check_reason - log_info "Starting GrpNode limit test" + log_info "Starting $wait_reason test" set job_id1 0 set job_id2 0 @@ -117,6 +117,6 @@ proc inc21_30_1 { wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing GrpNode limit: $message" + fail "Failure testing $wait_reason: $message" } } diff --git a/testsuite/expect/inc21.30.11 b/testsuite/expect/inc21.30.11 index 445aa19d998..4af2ce6b88b 100644 --- a/testsuite/expect/inc21.30.11 +++ b/testsuite/expect/inc21.30.11 @@ -35,7 +35,7 @@ proc inc21_30_11 { wait_reason } { return } - log_info "Starting GrpMem test" + log_info "Starting $wait_reason test" set job_id1 0 set job_id2 0 @@ -90,6 +90,6 @@ proc inc21_30_11 { wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing Group Memory: $message" + fail "Failure testing $wait_reason: $message" } } diff --git a/testsuite/expect/inc21.30.16 b/testsuite/expect/inc21.30.16 index f15450c86ba..fe02faf21f0 100644 --- a/testsuite/expect/inc21.30.16 +++ b/testsuite/expect/inc21.30.16 @@ -29,7 +29,7 @@ proc inc21_30_16 { wait_reason } { global salloc srun acct number bin_sleep maxnodespu_num - log_info "Starting MaxNodesPerUser test" + log_info "Starting $wait_reason test" set job_id 0 @@ -90,6 +90,6 @@ proc inc21_30_16 { wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing MaxNodesPerUser: $message" + fail "Failure testing $wait_reason: $message" } } diff --git a/testsuite/expect/inc21.30.3 b/testsuite/expect/inc21.30.3 index 573f199f4ea..b3631db0562 100644 --- a/testsuite/expect/inc21.30.3 +++ b/testsuite/expect/inc21.30.3 @@ -31,7 +31,7 @@ proc inc21_30_3 { wait_reason } { global salloc scontrol srun acct re_word_str bin_sleep jobmatch number grjobs_num - log_info "Starting GrpJob limit test" + log_info "Starting $wait_reason test" array set job_id {} set check_num $grjobs_num @@ -119,6 +119,6 @@ proc inc21_30_3 { wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing GrpJob limits: $message" + fail "Failure testing $wait_reason: $message" } } diff --git a/testsuite/expect/inc21.30.6 b/testsuite/expect/inc21.30.6 index 35b8415ecdb..cb075feb819 100644 --- a/testsuite/expect/inc21.30.6 +++ b/testsuite/expect/inc21.30.6 @@ -30,7 +30,7 @@ proc inc21_30_6 { wait_reason } { global salloc acct number srun job_id1 bin_sleep maxnode_num - log_info "Starting MaxNode limit test" + log_info "Starting $wait_reason test" set job_id1 0 # Raise an error to abort the catch block @@ -86,6 +86,6 @@ proc inc21_30_6 { wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing MaxNode limit: $message" + fail "Failure testing $wait_reason: $message" } } diff --git a/testsuite/expect/inc21.30.7 b/testsuite/expect/inc21.30.7 index 22a2889513b..872c90f90be 100644 --- a/testsuite/expect/inc21.30.7 +++ b/testsuite/expect/inc21.30.7 @@ -30,7 +30,7 @@ proc inc21_30_7 { wait_reason } { global salloc scontrol srun acct re_word_str bin_sleep jobmatch job_id1 job_id2 job_id3 number maxjobs_num - log_info "Starting MaxJob limit test" + log_info "Starting $wait_reason test" set check_num $maxjobs_num array set job_id {} @@ -117,6 +117,6 @@ proc inc21_30_7 { wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing MaxJobs limit: $message" + fail "Failure testing $wait_reason: $message" } } diff --git a/testsuite/expect/inc21.34.1 b/testsuite/expect/inc21.34.1 index 85a40e65cf9..a775d74d14e 100644 --- a/testsuite/expect/inc21.34.1 +++ b/testsuite/expect/inc21.34.1 @@ -31,7 +31,7 @@ proc inc21_34_1 { qostest wait_reason } { global salloc srun nthreads acct bin_sleep grpcpumin_num number global totcpus test_node - log_info "Starting GrpCpuMins test " + log_info "Starting $wait_reason test" if {![param_contains [get_config_param "AccountingStorageEnforce"] "safe"]} { log_warn "This test can't be run without AccountingStorageEnforce having \"safe\" in it" @@ -106,6 +106,6 @@ proc inc21_34_1 { qostest wait_reason } { # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only - fail "Failure testing QOSGrpCPUMinutesLimit: $message" + fail "Failure testing $wait_reason: $message" } } From 7c837c08c8aff66481bc4220b30984c9013cdb4e Mon Sep 17 00:00:00 2001 From: Ethan Simmons Date: Tue, 16 May 2023 09:15:30 -0600 Subject: [PATCH 63/81] Testsuite - Improve test_122_2.py increasing the timeout Previous timeout caused some false failure. Bug 14327 --- testsuite/python/tests/test_122_2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/python/tests/test_122_2.py b/testsuite/python/tests/test_122_2.py index 15ec72e3ea7..51e4fa7e394 100644 --- a/testsuite/python/tests/test_122_2.py +++ b/testsuite/python/tests/test_122_2.py @@ -24,7 +24,7 @@ def test_job_array_with_gres(): --output={output_pattern}") output_file_1 = f"{atf.module_tmp_path}/{job_id}-1.out" output_file_2 = f"{atf.module_tmp_path}/{job_id}-2.out" - atf.wait_for_job_state(job_id, 'DONE', timeout=5, fatal=True) + atf.wait_for_job_state(job_id, 'DONE', timeout=15, fatal=True) with open(output_file_1, 'r') as f: output = f.read() assert 'DONE' in output, 'Expect job to finish' From 450d56e89a23be75e3c1246497ebeeef107085cf Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Thu, 31 Aug 2023 17:39:57 +0200 Subject: [PATCH 64/81] Testsuite - Improve test21.31 using wait_for -subtest The fail message was one of the subtest/fail was wrong, but we better use the wait_for -subtest and forget about the message typo. Bug 17485 --- testsuite/expect/test21.31 | 45 +++++++++----------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/testsuite/expect/test21.31 b/testsuite/expect/test21.31 index ad46e4029f4..d907fec3f77 100755 --- a/testsuite/expect/test21.31 +++ b/testsuite/expect/test21.31 @@ -172,25 +172,11 @@ if {[check_resource_limits $sr1 [array get resource1_chck]] == 1} { # use scontrol to verify the modified cluster license # set matches 0 -eval spawn $scontrol show license $sr1@$resource1(Server) -expect { - -re "LicenseName=$sr1@$resource1(Server)" { - incr matches - exp_continue - } - -re "Total=$ect1a Used=0 Free=$ect1a Reserved=0 Remote=yes" { - incr matches - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -if {$matches != 2} { - fail "License output is incorrect ($matches != 2. Expected: LicenseName=$sr1@$resource1(Server). Expected: Total=$ect1 Used=0 Free=$ect1 Reserved=0 Remote=yes)" +wait_for -subtest {$matches == 2} { + set output [run_command_output -fail "$scontrol show license $sr1@$resource1(Server)"] + set matches 0 + incr matches [regexp "LicenseName=$sr1@$resource1(Server)" $output] + incr matches [regexp "Total=$ect1a Used=0 Free=$ect1a Reserved=0 Remote=yes" $output] } # @@ -203,22 +189,11 @@ if [remove_res $sr1] { # # use scontrol to verify cluster license was removed # -set matches 0 -spawn $scontrol show license -expect { - -re "LicenseName=$sr1@$resource1(Server)" { - incr matches - exp_continue - } - timeout { - fail "scontrol not responding" - } - eof { - wait - } -} -if {$matches != 0} { - fail "scontrol failed to remove license ($sr1@$resource1(Server))" +set matches 1 +wait_for -subtest {$matches == 0} { + set output [run_command_output -fail "$scontrol show license $sr1@$resource1(Server)"] + set matches 0 + incr matches [regexp "LicenseName=$sr1@$resource1(Server)" $output] } # From aecfbefc92d3dab66cea2a2dc390001f0fe8add2 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 1 Sep 2023 10:24:57 -0600 Subject: [PATCH 65/81] Testsuite - Improve test17.60 incrementing the deadline timeout The previous timeout was leading to false failures. Bug 14303 --- testsuite/expect/test17.60 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/test17.60 b/testsuite/expect/test17.60 index c3731964ce0..5de25d8446a 100755 --- a/testsuite/expect/test17.60 +++ b/testsuite/expect/test17.60 @@ -70,7 +70,7 @@ wait_for -timeout 15 {$state == "PENDING" && $reason == "BeginTime"} { } subtest {$state == "PENDING" && $reason == "BeginTime"} "Job should be PENDING with reason BeginTime" "JobState=$state Reason=$reason" -wait_for -timeout 60 {$state == "DEADLINE" && $reason == "DeadLine"} { +wait_for -timeout 120 {$state == "DEADLINE" && $reason == "DeadLine"} { set state [get_job_param $job_id "JobState"] set reason [get_job_param $job_id "Reason"] } From edf8ac2ded467c3175a443b8a91678b2a03863f8 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Mon, 4 Sep 2023 16:49:14 +0200 Subject: [PATCH 66/81] Testsuite - Improve test15.37 incrementing the deadline timeout The previous timeout was leading to false failures. This is a continuation/variant of aecfbefc92. A duplicated subtest has been also removed. Bug 14303 --- testsuite/expect/test15.37 | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/testsuite/expect/test15.37 b/testsuite/expect/test15.37 index e4c4ebc9c8b..5bbe1a892af 100755 --- a/testsuite/expect/test15.37 +++ b/testsuite/expect/test15.37 @@ -81,21 +81,12 @@ wait_for -timeout 15 {$state == "PENDING" && $reason == "BeginTime"} { } subtest {$state == "PENDING" && $reason == "BeginTime"} "Job should be PENDING with reason BeginTime" "JobState=$state Reason=$reason" -wait_for -timeout 60 {$state == "DEADLINE" && $reason == "DeadLine"} { +wait_for -timeout 120 {$state == "DEADLINE" && $reason == "DeadLine"} { set state [get_job_param $job_id "JobState"] set reason [get_job_param $job_id "Reason"] } subtest {$state == "DEADLINE" && $reason == "DeadLine"} "Job should end with state DEADLINE and reason DeadLine" "JobState=$state Reason=$reason" -# -# Wait and check that job state reach the deadline -# -set state "PENDING" -wait_for -timeout 60 {$state == "DEADLINE"} { - set state [get_job_param $job_id "JobState"] -} -subtest {$state == "DEADLINE"} "Job should end with state DEADLINE" "$state != DEADLINE" - # # Test that the state is accounted properly # From b335bdf7de750c0688dcdc166ee2c0fdbc90ac65 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Thu, 7 Sep 2023 17:49:03 +0200 Subject: [PATCH 67/81] Testsuite - Improve tests 21.30 and 21.34 using wait_* functions Previous implementation was too optimistic expecting that jobs not violating limits will never be PD. Other minor and style improvements added. Temporary debug traces added in bug 12658 to diagnose an equivalent issue also removed. Bug 17640 --- testsuite/expect/inc21.30.1 | 76 ++------------------- testsuite/expect/inc21.30.10 | 52 +++----------- testsuite/expect/inc21.30.11 | 46 ++----------- testsuite/expect/inc21.30.12 | 9 +-- testsuite/expect/inc21.30.13 | 48 ++----------- testsuite/expect/inc21.30.14 | 49 ++----------- testsuite/expect/inc21.30.15 | 55 ++------------- testsuite/expect/inc21.30.16 | 53 ++------------- testsuite/expect/inc21.30.17 | 51 ++------------ testsuite/expect/inc21.30.2 | 55 ++------------- testsuite/expect/inc21.30.3 | 87 ++++-------------------- testsuite/expect/inc21.30.4 | 117 +++++++++----------------------- testsuite/expect/inc21.30.5 | 51 +++----------- testsuite/expect/inc21.30.6 | 49 +++---------- testsuite/expect/inc21.30.7 | 80 +++------------------- testsuite/expect/inc21.30.8 | 112 +++++++++--------------------- testsuite/expect/inc21.30.9 | 53 +++------------ testsuite/expect/inc21.34.1 | 48 ++----------- testsuite/expect/inc21.34_tests | 8 +-- testsuite/expect/test21.30 | 11 +-- 20 files changed, 188 insertions(+), 922 deletions(-) diff --git a/testsuite/expect/inc21.30.1 b/testsuite/expect/inc21.30.1 index 52b1fb3da58..3766d26f2ab 100644 --- a/testsuite/expect/inc21.30.1 +++ b/testsuite/expect/inc21.30.1 @@ -28,80 +28,20 @@ # Test GrpNode limit proc inc21_30_1 { wait_reason } { - global srun salloc acct bin_sleep number grn_num check_reason + global srun acct bin_sleep grn_num log_info "Starting $wait_reason test" set job_id1 0 set job_id2 0 - # TODO Temporarily increase logging to debug bug 12658 (remove once fixed) - global bin_echo test_id - if {$test_id eq "21.30"} { - set config_dir [get_conf_path] - set config_file $config_dir/slurm.conf - save_conf $config_file - run_command -none "$bin_echo SlurmctldDebug=debug3 >> $config_file" - run_command -none "$bin_echo DebugFlags=TraceJobs >> $config_file" - reconfigure -fail - } - # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc -N$grn_num --account=$acct --exclusive -t1 $srun $bin_sleep 10 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - - # TODO Temporary debug for bug 12658 (remove once fixed) - global scontrol sacctmgr - run_command "$scontrol show jobs" - run_command "$scontrol show nodes" - run_command "$scontrol show reservations" - run_command "$sacctmgr show assoc" - - error "Job should be running, but is not" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not reponding" - } - eof { - wait - } - } - if {$job_id1 == 0} { - error "Jobs were not submitted" - } + set job_id1 [submit_job -fail "-N$grn_num --account=$acct --exclusive -t1 -o /dev/null --wrap '$srun $bin_sleep 300'"] + subtest {![wait_for_job $job_id1 "RUNNING"]} "Job should run $wait_reason" - spawn $salloc -N1 --account=$acct --exclusive -t1 $srun $bin_sleep 10 - expect { - -re "Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - error "This should be pended but is not" - } - timeout { - error "salloc is not reponding" - } - eof { - wait - } - } - if {$job_id2 == 0} { - error "Jobs were not submitted" - } - - subtest [check_job_state $job_id2 PENDING] "Job should be in the PENDING state" - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "-N1 --account=$acct --exclusive -t1 -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have wait reason $wait_reason" } message] ; # Store the error message in $message @@ -109,12 +49,6 @@ proc inc21_30_1 { wait_reason } { cancel_job $job_id1 cancel_job $job_id2 - # TODO Temporarily restore logging to debug bug 12658 (remove once fixed) - if {$test_id eq "21.30"} { - restore_conf $config_file - reconfigure - } - # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only fail "Failure testing $wait_reason: $message" diff --git a/testsuite/expect/inc21.30.10 b/testsuite/expect/inc21.30.10 index 404337e35f5..73cba575428 100644 --- a/testsuite/expect/inc21.30.10 +++ b/testsuite/expect/inc21.30.10 @@ -27,62 +27,30 @@ ############################################################################ proc inc21_30_10 { wait_reason } { - global salloc srun acct bin_sleep grpcpurunmin_num number + global srun acct bin_sleep grpcpurunmin_num - log_info "Starting GrpCpuRunMins test" + log_info "Starting $wait_reason test" set job_id1 0 set job_id2 0 # Raise an error to abort the catch block set exception_code [catch { + # There may be some GrpCPUmins data left from previous jobs, so use less than the limit set target_time [expr $grpcpurunmin_num / 4] set time_limit [expr $target_time - 3] if {$time_limit < 1} { - log_warn "Invalid time limit" + subskip "Time limit needs to be higher ($grpcpurunmin_num)" return } - # There may be some GrpCPUmins data left from previous jobs, so use less than the limit - spawn $salloc --account=$acct -n4 --time=$time_limit $srun $bin_sleep 10 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "Job should be running, but is not" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - spawn $salloc --account=$acct -n4 -t[expr $target_time + 1] $srun $bin_sleep 10 - expect { - -re "Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - error "Job should be pending, but is not" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } + set job_id1 [submit_job -fail "--account=$acct -n4 --time=$time_limit -o /dev/null --wrap '$srun $bin_sleep 1'"] + subtest {![wait_for_job $job_id1 "DONE"]} "Job should run $wait_reason" + cancel_job $job_id1 - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -n4 -t[expr $target_time + 1] -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" + cancel_job $job_id2 } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.11 b/testsuite/expect/inc21.30.11 index 4af2ce6b88b..17d30326bf2 100644 --- a/testsuite/expect/inc21.30.11 +++ b/testsuite/expect/inc21.30.11 @@ -27,11 +27,11 @@ ############################################################################ proc inc21_30_11 { wait_reason } { - global salloc srun acct bin_sleep grpmem_num number + global srun acct bin_sleep grpmem_num global test_select_type_params if {![param_contains [get_config_param "SelectTypeParameters"] "CR_*MEMORY"]} { - log_warn "Memory management not configured" + subskip "This test needs SelectTypeParameters including MEMORY" return } @@ -42,45 +42,11 @@ proc inc21_30_11 { wait_reason } { # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -N1 --mem=$grpmem_num -t1 $srun $bin_sleep 10 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "Job should have run but did not" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } + set job_id1 [submit_job -fail "--account=$acct -N1 --mem=$grpmem_num -t1 -o /dev/null --wrap '$srun $bin_sleep 120'"] + subtest {![wait_for_job $job_id1 "RUNNING"]} "Job should run $wait_reason" - spawn $salloc --account=$acct -N1 --mem=$grpmem_num -t1 $srun $bin_sleep 10 - expect { - -re "Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - error "Job should have pended, but did not" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -N1 --mem=$grpmem_num -t1 -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.12 b/testsuite/expect/inc21.30.12 index 6473353b76b..50fb282d603 100644 --- a/testsuite/expect/inc21.30.12 +++ b/testsuite/expect/inc21.30.12 @@ -27,7 +27,7 @@ ############################################################################ proc inc21_30_12 { } { - global salloc srun acct bin_sleep grpwall_num number qostest + global srun acct bin_sleep grpwall_num qostest log_info "Starting Grpwall test" set job_id_list [list] @@ -56,11 +56,8 @@ proc inc21_30_12 { } { } log_debug "Submitting the final job and check that it is set Pending with Reason QOSGrpWallLimit" - set job_id [submit_job -fail "--account=$acct --qos=$qostest -N1 -t$job_time --wrap '$bin_sleep $sleep_time' -o /dev/null -e /dev/null"] - lappend job_id_list $job_id - - # Subtest of the limit - subtest {! [wait_job_reason $job_id PENDING QOSGrpWallLimit]} "Job should achieve PENDING state with reason QOSGrpWallLimit" + lappend job_id_list [submit_job -fail "--account=$acct --qos=$qostest -N1 -t$job_time --wrap '$bin_sleep $sleep_time' -o /dev/null -e /dev/null"] + subtest {! [wait_job_reason [lindex $job_id_list end] "PENDING" "QOSGrpWallLimit"]} "Job should achieve PENDING state with reason QOSGrpWallLimit" # Reset the QoS usage if [reset_qos_usage "" $qostest] { diff --git a/testsuite/expect/inc21.30.13 b/testsuite/expect/inc21.30.13 index 7928fcbfbc7..1aa7856deab 100644 --- a/testsuite/expect/inc21.30.13 +++ b/testsuite/expect/inc21.30.13 @@ -27,56 +27,20 @@ ############################################################################ proc inc21_30_13 { wait_reason } { - global salloc srun acct bin_sleep maxcpumin_num number + global srun acct bin_sleep maxcpumin_num - log_info "Starting MaxCpuMins test" + log_info "Starting $wait_reason test" set job_id1 0 set job_id2 0 # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -n$maxcpumin_num -t1 $srun $bin_sleep 10 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "This job should not be pending" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } + set job_id1 [submit_job -fail "--account=$acct -n$maxcpumin_num -t1 -o /dev/null --wrap '$srun $bin_sleep 120'"] + subtest {![wait_for_job $job_id1 "RUNNING"]} "Job should run $wait_reason" - spawn $salloc --account=$acct -n[expr $maxcpumin_num + 1] -t1 $srun $bin_sleep 10 - expect { - -re " Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - - } - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - error "Job ($job_id2) should be pending but is not" - } - timeout { - error "salloc is not responing" - } - eof { - wait - } - } - - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -n[expr $maxcpumin_num + 1] -t1 -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.14 b/testsuite/expect/inc21.30.14 index 3cc1e2da6bd..f47839a1e3c 100644 --- a/testsuite/expect/inc21.30.14 +++ b/testsuite/expect/inc21.30.14 @@ -27,57 +27,20 @@ ############################################################################ proc inc21_30_14 { wait_reason } { - global salloc srun acct bin_sleep number maxwall_num + global srun acct bin_sleep maxwall_num - log_info "Starting MaxWall test" + log_info "Starting $wait_reason test" set job_id 0 # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -N1 -t$maxwall_num $srun $bin_sleep 10 - expect { - -re "Pending job allocation ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - -re "Granted job allocation ($number)" { - set job_id $expect_out(1,string) - log_debug "Job $job_id has been submitted" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } - - # Cancel job + set job_id [submit_job -fail "--account=$acct -N1 -t$maxwall_num -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run $wait_reason" cancel_job $job_id - spawn $salloc --account=$acct -N1 -t[expr $maxwall_num + 1] $srun $bin_sleep 10 - expect { - -re "Pending job allocation ($number)" { - set job_id $expect_out(1,string) - log_debug "Job $job_id is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - set job_id $expect_out(1,string) - error "Job ($job_id) should be pending for resources, but is not" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } - - subtest [check_reason $job_id $wait_reason] "Job should have wait reason $wait_reason" + set job_id [submit_job -fail "--account=$acct -N1 -t[expr $maxwall_num + 1] -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_job_reason $job_id "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.15 b/testsuite/expect/inc21.30.15 index c6ac61a5ab8..14efbf9fdf6 100644 --- a/testsuite/expect/inc21.30.15 +++ b/testsuite/expect/inc21.30.15 @@ -27,9 +27,9 @@ ############################################################################ proc inc21_30_15 { wait_reason } { - global salloc srun bin_sleep number maxcpuspu_num acct + global srun bin_sleep maxcpuspu_num acct - log_info "Starting MaxCPUsPerUser test" + log_info "Starting $wait_reason test" set job_id1 0 set job_id2 0 @@ -37,54 +37,11 @@ proc inc21_30_15 { wait_reason } { # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -t1 -n$maxcpuspu_num $srun $bin_sleep 20 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "job ($number) queued and waiting for resources" { - set job_id1 $expect_out(1,string) - error "Job ($job_id1) should not be waiting for resources, but is" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } + set job_id1 [submit_job -fail "--account=$acct -t1 -n$maxcpuspu_num -o /dev/null --wrap '$srun $bin_sleep 120'"] + subtest {![wait_for_job $job_id1 "RUNNING"]} "Job should run $wait_reason" - if { $job_id1 == 0 } { - error "Job was not submitted" - } - - spawn $salloc --account=$acct -t1 -n$maxcpuspu_num $srun $bin_sleep 20 - expect { - -re "job ($number) queued and waiting for resources" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resource. This is expected do not worry" - } - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - error "Job ($job_id2) should be pending for resources, but is not" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } - - if { $job_id2 == 0 } { - error "Job was not submitted" - } - - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -t1 -n$maxcpuspu_num -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.16 b/testsuite/expect/inc21.30.16 index fe02faf21f0..a148b19de5e 100644 --- a/testsuite/expect/inc21.30.16 +++ b/testsuite/expect/inc21.30.16 @@ -27,7 +27,7 @@ ############################################################################ proc inc21_30_16 { wait_reason } { - global salloc srun acct number bin_sleep maxnodespu_num + global srun acct bin_sleep maxnodespu_num log_info "Starting $wait_reason test" @@ -36,52 +36,13 @@ proc inc21_30_16 { wait_reason } { # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -N$maxnodespu_num -t1 --exclusive $srun $bin_sleep 2 - expect { - -re "Granted job allocation ($number)" { - set job_id $expect_out(1,string) - log_debug "Job $job_id was submitted" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } + set job_id [submit_job -fail "--account=$acct -N$maxnodespu_num -t1 --exclusive -o /dev/null --wrap '$srun $bin_sleep 10'"] + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run $wait_reason" + cancel_job $job_id - if { $job_id == 0 } { - error "Job was not submitted" - } else { - # Cancel job - cancel_job $job_id - } - - spawn $salloc --account=$acct -N[expr $maxnodespu_num + 1] -t1 --exclusive $srun $bin_sleep 2 - expect { - -re "Pending job allocation ($number)" { - set job_id $expect_out(1,string) - log_debug "This error is expected. Do not worry" - } - -re "Granted job allocation" { - error "salloc should not have granted an allocation" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } - - if { $job_id == 0 } { - error "Job was not submitted" - } - - subtest [check_reason $job_id $wait_reason] "Job should have wait reason $wait_reason" + set job_id [submit_job -fail "--account=$acct -N[expr $maxnodespu_num + 1] -t1 --exclusive -o /dev/null --wrap '$srun $bin_sleep 5'"] + subtest {![wait_job_reason $job_id "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" + cancel_job $job_id } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.17 b/testsuite/expect/inc21.30.17 index bd107b56955..657cac0b47b 100644 --- a/testsuite/expect/inc21.30.17 +++ b/testsuite/expect/inc21.30.17 @@ -27,37 +27,14 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ -# Checks the timelimit of the job -proc check_timelimit { job } { - - global scontrol job_id maxwall_num - - set timelimit_match 0 - spawn $scontrol show job $job - expect { - -re "TimeLimit=00:0$maxwall_num:00" { - incr timelimit_match - } - timeout { - error "scontrol not responding" - } - eof { - wait - } - } - - subtest {$timelimit_match == 1} "Job should have as TimeLimit the QOS MaxWall" -} - proc inc21_30_17 { } { - global salloc srun acct bin_sleep number maxwall_num skips + global srun acct bin_sleep maxwall_num # If the partition has DefaultTime set it will mess up this test. # Since there is currently no way to use scontrol to change default # time to "none" just skip this subtest. if { [get_partition_default_time_limit ""] != -1 } { - log_warn "SKIP: Default partition has a default time limit set, cannot run inc21.30.17" - incr skips + subskip "This test needs the default partition without a DefaultTime" return } @@ -67,27 +44,9 @@ proc inc21_30_17 { } { # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -N1 $srun $bin_sleep 10 - expect { - -re "Granted job allocation ($number)" { - set job_id $expect_out(1,string) - log_debug "Job $job_id has been submitted" - check_timelimit $job_id - } - -re "Pending job allocation ($number)" { - set job_id $expect_out(1,string) - error "job $job_id should not be waiting for resources" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } + set job_id [submit_job -fail "--account=$acct -N1 -o /dev/null --wrap '$srun $bin_sleep 120'"] + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run Qos/Parent MaxWall" + subtest {[get_job_param $job_id "TimeLimit"] == "00:0$maxwall_num:00"} "Job should have TimeLimit=00:0$maxwall_num:00" } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.2 b/testsuite/expect/inc21.30.2 index d4f8d2aa091..e76c97ddab8 100644 --- a/testsuite/expect/inc21.30.2 +++ b/testsuite/expect/inc21.30.2 @@ -28,65 +28,22 @@ # Test GrpCpus proc inc21_30_2 { wait_reason } { - global srun salloc acct bin_sleep jobmatch job_id1 job_id2 number + global srun acct bin_sleep global grcpu_num test_node set job_id1 0 set job_id2 0 - log_info "Starting GrpCPUs limit test" + log_info "Starting $wait_reason limit test" # Raise an error to abort the catch block set exception_code [catch { - set jobmatch 0 - spawn $salloc --account=$acct -w$test_node -t1 -n$grcpu_num $srun $bin_sleep 5 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - incr jobmatch - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "Job should be running, but is not. If you have CR_CORE_* and have ThreadsPerCore > 1 this could happen" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not reponding" - } - eof { - wait - } - } + set job_id1 [submit_job -fail "--account=$acct -w$test_node -t1 -n$grcpu_num -o none --wrap '$srun $bin_sleep 300'"] + subtest {![wait_for_job $job_id1 "RUNNING"]} "Job should run $wait_reason" "If you have CR_CORE_* and have ThreadsPerCore > 1 this could happen" - spawn $salloc --account=$acct -w$test_node -t1 -n[expr $grcpu_num + 1] $srun $bin_sleep 5 - expect { - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - error "Job should be pending, but is not" - } - -re "Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - incr jobmatch - } - timeout { - error "salloc is not reponding If you have CR_CORE_* and have ThreadsPerCore > 1 this could happen." - } - eof { - wait - } - } - if {$jobmatch != 2} { - error "jobs were not submitted" - } - - # Checks the state of the job - subtest [check_job_state $job_id2 PENDING] "Job should be in the PENDING state" - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -w$test_node -t1 -n[expr $grcpu_num + 1] -o none --wrap '$srun $bin_sleep 5'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" } message] ; # Store the error message in $message diff --git a/testsuite/expect/inc21.30.3 b/testsuite/expect/inc21.30.3 index b3631db0562..c26aea62630 100644 --- a/testsuite/expect/inc21.30.3 +++ b/testsuite/expect/inc21.30.3 @@ -28,94 +28,33 @@ # Test GrpJob limits proc inc21_30_3 { wait_reason } { - - global salloc scontrol srun acct re_word_str bin_sleep jobmatch number grjobs_num + global srun acct bin_sleep grjobs_num log_info "Starting $wait_reason test" - array set job_id {} + set job_ids [list] set check_num $grjobs_num # Raise an error to abort the catch block set exception_code [catch { - set jobmatch 0 - for {set inx 0} {$inx <= $check_num} {incr inx} { - set test_reason 0 - spawn $salloc --account=$acct -n1 -t1 $srun $bin_sleep 60 - expect { - -re "Granted job allocation ($number)" { - set job_id($inx) $expect_out(1,string) - if {$inx < $check_num} { - log_debug "Job $inx $job_id($inx) was submitted" - incr jobmatch - } else { - error "Job $inx ($job_id($inx)) wasn't suppose to work but it did, limit was $check_num" - } - } - -re "job ($number) queued and waiting for resources" { - set job_id($inx) $expect_out(1,string) - if {$inx >= $check_num} { - log_debug "Job $inx $job_id($inx) is waiting for resources. This is expected" - incr jobmatch - } else { - set test_reason 1 - } - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } - -# Job is waiting, check if the reason is the limit we are testing for - if {$test_reason != 0 && $job_id($inx) != 0} { - spawn $scontrol show job $job_id($inx) - expect { - -re "Reason=($re_word_str)" { - if {$expect_out(1,string) ne $wait_reason} { - error "Job $inx ($job_id($inx)) was suppose to work but it didn't, limit was $check_num" - } else { - incr jobmatch - } - exp_continue - } - timeout { - error "scontrol is not responding" - } - eof { - wait - } - } - } - } - - set target_jobs [expr $check_num + 1] - if {$jobmatch != $target_jobs} { - error "Not all jobs were not submitted ($jobmatch != $target_jobs)" + for {set inx 0} {$inx < $check_num} {incr inx} { + lappend job_ids [submit_job -fail "--account=$acct -n1 -t1 -o none --wrap '$srun $bin_sleep 300'"] } - # Checks the job state - if [info exists job_id($check_num)] { - subtest [check_job_state $job_id($check_num) PENDING] "Job should be in the PENDING state" - } else { - error "Didn't attempt to start enough jobs" + foreach job_id $job_ids { + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run $wait_reason" } - } message] ; # Store the error message in $message + #AGIL: + run_command "squeue" + lappend job_ids [submit_job -fail "--account=$acct -n1 -t1 -o none --wrap '$srun $bin_sleep 300'"] + subtest {![wait_job_reason [lindex $job_ids end] "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" + + } message] ; # Store the error message in $message # Perform local cleanup - for {set inx 0} {$inx <= $check_num} {incr inx} { - if [info exists job_id($inx)] { - # Cancels the jobs - cancel_job $job_id($inx) - } - } + cancel_job $job_ids # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.30.4 b/testsuite/expect/inc21.30.4 index 1fb947f2676..23a4b97da35 100644 --- a/testsuite/expect/inc21.30.4 +++ b/testsuite/expect/inc21.30.4 @@ -29,111 +29,60 @@ # Test GrpSubmit proc inc21_30_4 {} { - global salloc sbatch srun acct bin_rm bin_sleep jobmatch - global number grsub_num + global sbatch srun acct bin_sleep + global grsub_num log_info "Starting GrpSubmit limit test" set check_num $grsub_num - set file_in "in21.30.4_script" set tmp_job_id 0 - array set job_id {} - - make_bash_script $file_in "sleep 5" + set job_ids [list] # Raise an error to abort the catch block set exception_code [catch { - set jobmatch 0 - for {set inx 0} {$inx <= $check_num} {incr inx} { - spawn $salloc --account=$acct -t1 -n1 $srun $bin_sleep 60 - expect { - -re "Granted job allocation ($number)" { - set job_id($inx) $expect_out(1,string) - if {$inx < $check_num} { - log_debug "Job $inx $job_id($inx) was submitted" - incr jobmatch - } else { - error "Job $inx ($job_id($inx)) wasn't suppose to work but it did, limit was $check_num" - } - } - -re "error" { - if {$inx >= $check_num} { - log_debug "Job $inx didn't get submitted. This is expected" - incr jobmatch - } else { - error "Job $inx ($job_id($inx)) was suppose to work but it didn't, limit was $check_num" - } - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } + for {set inx 0} {$inx < $check_num} {incr inx} { + lappend job_ids [submit_job -fail "--account=$acct -t1 -n1 -o/dev/null --wrap '$srun $bin_sleep 300'"] } - set target_jobs [expr $check_num + 1] - if {$jobmatch != $target_jobs} { - error "Not all jobs were not submitted ($jobmatch != $target_jobs)" + foreach job_id $job_ids { + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run GrpSubmit" } - for {set inx 0} {$inx <= $check_num} {incr inx} { - if [info exists job_id($inx)] { - # Cancels the jobs - cancel_job $job_id($inx) - } - } + set output [run_command_output -xfail -subtest "$sbatch --account=$acct -t1 -n1 -o/dev/null --wrap '$bin_sleep 1'"] + subtest {![regexp {Submitted \S+ job (\d+)} $output - tmp_job_id]} "Job should NOT be submitted" + subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job violates policy message should be printed" + + lappend job_ids $tmp_job_id + cancel_job $job_ids # Test limit with job array + set tmp_job_id 0 + set job_ids [list] + set upper_lim [expr $check_num - 1] - spawn $sbatch --account=$acct -t1 -a0-$upper_lim -o/dev/null $file_in - expect { - -re "Submitted batch job ($number)" { - set tmp_job_id $expect_out(1,string) - exp_continue - } - timeout { - error "sbatch is not responding" - } - eof { - wait - } - } - if {$tmp_job_id == 0} { - error "Job was not submitted when it should have" - } + lappend job_ids [submit_job "--account=$acct -t1 -a0-$upper_lim -o/dev/null --wrap '$bin_sleep 120'"] + subtest {![wait_for_job [lindex $job_ids end] "RUNNING"]} "Job should run GrpSubmit" # Add one more job, and check for error message - set match 0 - spawn $sbatch --account=$acct -t1 -a0-$upper_lim -o/dev/null $file_in - expect { - -re "Job violates accounting/QOS policy" { - set match 1 - exp_continue - } - timeout { - error "sbatch is not responding" - } - eof { - wait - } - } - if {$match == 0} { - error "Job should not have been submitted" - } + set output [run_command_output -xfail -subtest "$sbatch --account=$acct -n1 -t1 -o/dev/null --wrap '$bin_sleep 1'"] + subtest {![regexp {Submitted \S+ job (\d+)} $output - tmp_job_id]} "Job should NOT be submitted" + subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job violates policy message should be printed" + + lappend job_ids $tmp_job_id + cancel_job $job_ids + + # Test array too big + set tmp_job_id 0 + set output [run_command_output -xfail -subtest "$sbatch --account=$acct -t1 -a0-$check_num -o/dev/null --wrap '$bin_sleep 1'"] + subtest {![regexp {Submitted \S+ job (\d+)} $output - tmp_job_id]} "Job array should NOT be submitted" + subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job violates policy message should be printed" + cancel_job $tmp_job_id } message] ; # Store the error message in $message # Perform local cleanup - for {set inx 0} {$inx <= $check_num} {incr inx} { - if [info exists job_id($inx)] { - # Cancels the jobs - cancel_job $job_id($inx) - } - } - cancel_job $tmp_job_id - exec $bin_rm -f $file_in + lappend job_ids $tmp_job_id + cancel_job $job_ids # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.30.5 b/testsuite/expect/inc21.30.5 index f500b5f4fcd..b6914c176e6 100644 --- a/testsuite/expect/inc21.30.5 +++ b/testsuite/expect/inc21.30.5 @@ -29,62 +29,29 @@ # Test MaxCpus limits proc inc21_30_5 { wait_reason } { - global salloc acct number srun bin_sleep maxcpu_num + global acct srun bin_sleep maxcpu_num global test_node - log_info "Starting MaxCPUs limit test" + log_info "Starting $wait_reason limit test" set job_id1 0 + set job_id2 0 # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -t1 -w$test_node -n$maxcpu_num $srun $bin_sleep 2 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "This should have run but did not" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - # Cancels remaining jobs + set job_id1 [submit_job -fail "--account=$acct -t1 -w$test_node -n$maxcpu_num -o /dev/null --wrap '$srun $bin_sleep 1'"] + subtest {![wait_for_job $job_id1 "DONE"]} "Job should run $wait_reason" cancel_job $job_id1 - spawn $salloc --account=$acct -t1 -w$test_node -n[expr $maxcpu_num + 1] $srun $bin_sleep 2 - expect { - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - error "This should have pended but did not" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - subtest [check_reason $job_id1 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -t1 -w$test_node -n[expr $maxcpu_num + 1] -o /dev/null --wrap '$srun $bin_sleep 1'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" + cancel_job $job_id2 } message] ; # Store the error message in $message # Cancels remaining jobs cancel_job $job_id1 + cancel_job $job_id2 # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.30.6 b/testsuite/expect/inc21.30.6 index cb075feb819..534bf33a9c2 100644 --- a/testsuite/expect/inc21.30.6 +++ b/testsuite/expect/inc21.30.6 @@ -28,61 +28,28 @@ # Test MaxNode limit proc inc21_30_6 { wait_reason } { - global salloc acct number srun job_id1 bin_sleep maxnode_num + global acct srun bin_sleep maxnode_num log_info "Starting $wait_reason test" set job_id1 0 + set job_id2 0 # Raise an error to abort the catch block set exception_code [catch { - spawn $salloc --account=$acct -N$maxnode_num --exclusive -t1 $srun $bin_sleep 2 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "This should have run but did not" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - # Cancels remaining jobs + set job_id1 [submit_job -fail "--account=$acct -N$maxnode_num --exclusive -t1 -o none --wrap '$srun $bin_sleep 1'"] + subtest {![wait_for_job $job_id1 "DONE"]} "Job should run $wait_reason" cancel_job $job_id1 - spawn $salloc --account=$acct -N[expr $maxnode_num + 1] --exclusive -t1 $srun $bin_sleep 2 - expect { - -re "job ($number) queued and waiting for resources" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - error "This should have pended but did not" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - subtest [check_reason $job_id1 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -N[expr $maxnode_num + 1] --exclusive -t1 -o none --wrap '$srun $bin_sleep 1'"] + subtest {![wait_job_reason -timeout 30 $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" + cancel_job $job_id2 } message] ; # Store the error message in $message # Cancels remaining jobs cancel_job $job_id1 + cancel_job $job_id2 # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.30.7 b/testsuite/expect/inc21.30.7 index 872c90f90be..5e80d9bc1d9 100644 --- a/testsuite/expect/inc21.30.7 +++ b/testsuite/expect/inc21.30.7 @@ -28,92 +28,30 @@ # Test MaxJobs limit proc inc21_30_7 { wait_reason } { - global salloc scontrol srun acct re_word_str bin_sleep jobmatch job_id1 job_id2 job_id3 number maxjobs_num + global srun acct bin_sleep maxjobs_num log_info "Starting $wait_reason test" set check_num $maxjobs_num - array set job_id {} + set job_ids [list] # Raise an error to abort the catch block set exception_code [catch { - set jobmatch 0 - for {set inx 0} {$inx <= $check_num} {incr inx} { - set test_reason 0 - spawn $salloc --account=$acct -t1 -n1 $srun $bin_sleep 60 - expect { - -re "Granted job allocation ($number)" { - set job_id($inx) $expect_out(1,string) - if {$inx < $check_num} { - log_debug "Job $inx $job_id($inx) was submitted. (inc21.30.7)" - incr jobmatch - } else { - error "Job $inx ($job_id($inx)) wasn't suppose to work but it did, limit was $check_num" - } - } - -re "job ($number) queued and waiting for resources" { - set job_id($inx) $expect_out(1,string) - if {$inx >= $check_num} { - log_debug "Job $inx $job_id($inx) is waiting for resources. This is expected" - incr jobmatch - } else { - set test_reason 1 - } - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } - - # Job is waiting, check if the reason is the limit we are testing for - if {$test_reason != 0 && $job_id($inx) != 0} { - spawn $scontrol show job $job_id($inx) - expect { - -re "Reason=($re_word_str)" { - if {$expect_out(1,string) ne $wait_reason} { - error "Job $inx ($job_id($inx)) was suppose to start but it didn't, limit was $check_num" - } else { - incr jobmatch - } - exp_continue - } - timeout { - error "scontrol is not responding" - } - eof { - wait - } - } - } + for {set inx 0} {$inx < $check_num} {incr inx} { + lappend job_ids [submit_job -fail "--account=$acct -t1 -n1 -o none --wrap '$srun $bin_sleep 300'"] } - set target_jobs [expr $check_num + 1] - if {$jobmatch != $target_jobs} { - error "Not all jobs were not submitted ($jobmatch != $target_jobs)" + foreach job_id $job_ids { + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run $wait_reason" } - # Checks the job state - if [info exists job_id($check_num)] { - subtest [check_job_state $job_id($check_num) PENDING] "Job should be in the PENDING state" - } else { - error "Didn't attempt to start enough jobs" - } + lappend job_ids [submit_job -fail "--account=$acct -t1 -n1 -o none --wrap '$srun $bin_sleep 300'"] + subtest {![wait_job_reason [lindex $job_ids end] "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" } message] ; # Store the error message in $message # Perform local cleanup - for {set inx 0} {$inx <= $check_num} {incr inx} { - if [info exists job_id($inx)] { - # Cancels the jobs - cancel_job $job_id($inx) - } - } + cancel_job $job_ids # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.30.8 b/testsuite/expect/inc21.30.8 index da6234a815f..1f87836874c 100644 --- a/testsuite/expect/inc21.30.8 +++ b/testsuite/expect/inc21.30.8 @@ -28,105 +28,59 @@ # Test MaxJobsSubmits limit proc inc21_30_8 {} { - global sbatch salloc srun acct bin_rm bin_sleep jobmatch - global number maxjobsub_num + global sbatch srun acct bin_sleep maxjobsub_num log_info "Starting MaxJobsSubmits limit test" set check_num $maxjobsub_num - set file_in "in21.30.8_script" set tmp_job_id 0 - array set job_id {} - - make_bash_script $file_in "sleep 5" + set job_ids [list] # Raise an error to abort the catch block set exception_code [catch { - set jobmatch 0 - for {set inx 0} {$inx <= $check_num} {incr inx} { - spawn $salloc --account=$acct -t1 -n1 $srun $bin_sleep 100 - expect { - -re "Granted job allocation ($number)" { - set job_id($inx) $expect_out(1,string) - if {$inx < $check_num} { - log_debug "Job $inx $job_id($inx) was submitted" - incr jobmatch - } else { - error "Job $inx ($job_id($inx)) wasn't suppose to work but it did, limit was $check_num" - } - } - -re "error" { - if {$inx >= $check_num} { - log_debug "Job $inx didn't get submitted. This is expected" - incr jobmatch - } else { - error "Job $inx was suppose to work but it didn't, limit was $check_num?" - } - } - timeout { - error "salloc is not responding" - } - eof { - wait - } - } + for {set inx 0} {$inx < $check_num} {incr inx} { + lappend job_ids [submit_job -fail "--account=$acct -t1 -n1 -o/dev/null --wrap '$srun $bin_sleep 300'"] } - set target_jobs [expr $check_num + 1] - subtest {$jobmatch == $target_jobs} "All $target_jobs jobs should have been submitted" "Only $jobmatch jobs were submitted" - - for {set inx 0} {$inx <= $check_num} {incr inx} { - if [info exists job_id($inx)] { - # Cancels the jobs - cancel_job $job_id($inx) - } + foreach job_id $job_ids { + subtest {![wait_for_job $job_id "RUNNING"]} "Job should run MaxJobsSubmits" } + set output [run_command_output -xfail -subtest "$sbatch --account=$acct -t1 -n1 -o/dev/null --wrap '$bin_sleep 300'"] + subtest {![regexp {Submitted \S+ job (\d+)} $output - tmp_job_id]} "Job should NOT be submitted" + subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job violates policy message should be printed" + + lappend job_ids $tmp_job_id + cancel_job $job_ids + # Test limit with job array + set tmp_job_id 0 + set job_ids [list] + set upper_lim [expr $check_num - 1] - spawn $sbatch --account=$acct -t1 -a0-$upper_lim -o/dev/null $file_in - expect { - -re "Submitted batch job ($number)" { - set tmp_job_id $expect_out(1,string) - exp_continue - } - timeout { - error "sbatch is not responding" - } - eof { - wait - } - } - subtest {$tmp_job_id != 0} "All job array elements should have been submitted" + lappend job_ids [submit_job "--account=$acct -t1 -a0-$upper_lim -o/dev/null --wrap '$bin_sleep 120'"] + subtest {[lindex $job_ids end]} "Job array should be submitted" # Add one more job, and check for error message - set match 0 - spawn $sbatch --account=$acct -t1 -a0-$upper_lim -o/dev/null $file_in - expect { - -re "Job violates accounting/QOS policy" { - set match 1 - exp_continue - } - timeout { - error "sbatch is not responding" - } - eof { - wait - } - } - subtest {$match != 0} "Additional job should not have been submitted" + set output [run_command_output -xfail -subtest "$sbatch --account=$acct -n1 -t1 -o/dev/null --wrap '$bin_sleep 1'"] + subtest {![regexp {Submitted \S+ job (\d+)} $output - tmp_job_id]} "Job should NOT be submitted" + subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job violates policy message should be printed" + + lappend job_ids $tmp_job_id + cancel_job $job_ids + + # Test array too big + set tmp_job_id 0 + set output [run_command_output -xfail -subtest "$sbatch --account=$acct -t1 -a0-$check_num -o/dev/null --wrap '$bin_sleep 1'"] + subtest {![regexp {Submitted \S+ job (\d+)} $output - tmp_job_id]} "Job array should NOT be submitted" + subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job violates policy message should be printed" + cancel_job $tmp_job_id } message] ; # Store the error message in $message # Perform local cleanup - for {set inx 0} {$inx <= $check_num} {incr inx} { - if [info exists job_id($inx)] { - # cancels the jobs - cancel_job $job_id($inx) - } - } - exec $bin_rm -f $file_in - cancel_job $tmp_job_id + lappend job_ids $tmp_job_id + cancel_job $job_ids # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.30.9 b/testsuite/expect/inc21.30.9 index e11537b9d42..568b96c7958 100644 --- a/testsuite/expect/inc21.30.9 +++ b/testsuite/expect/inc21.30.9 @@ -28,13 +28,13 @@ # Test GrpCPUmins proc inc21_30_9 { wait_reason } { - global salloc srun nthreads acct bin_sleep grpcpumin_num number - global totcpus test_node qostest + global srun nthreads acct bin_sleep grpcpumin_num + global test_node qostest - log_info "Starting GrpCpuMins test " + log_info "Starting $wait_reason test" if {![param_contains [get_config_param "AccountingStorageEnforce"] "safe"]} { - log_warn "This test can't be run without AccountingStorageEnforce having \"safe\" in it" + subskip "This subtest needs AccountingStorageEnforce=safe" return } @@ -52,55 +52,20 @@ proc inc21_30_9 { wait_reason } { error "Unable to reset QOS usage" } - spawn $salloc --account=$acct -w$test_node --qos=$qostest -n$nthreads -t$timelimit $srun $bin_sleep 1 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "This job should not be pending" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } + set job_id1 [submit_job -fail "--account=$acct -w$test_node --qos=$qostest -n$nthreads -t$timelimit -o none --wrap '$srun $bin_sleep 1'"] + subtest {![wait_for_job $job_id1 "DONE"]} "Job should run $wait_reason" cancel_job $job_id1 if [reset_qos_usage "" $qostest] { error "Unable to reset QOS usage" } - spawn $salloc --account=$acct -w$test_node --qos=$qostest -n$nthreads -t[expr $timelimit + 1] $srun $bin_sleep 1 - expect { - -re "Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - error "Job should be pending but is not" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -w$test_node --qos=$qostest -n$nthreads -t[expr $timelimit + 1] -o none --wrap '$srun $bin_sleep 1'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have pending reason $wait_reason" + cancel_job $job_id2 } message] ; # Store the error message in $message - cancel_job $job_id2 # Convert any errors into failures (after cleaning up) if {$exception_code == 1} { ; # errors only diff --git a/testsuite/expect/inc21.34.1 b/testsuite/expect/inc21.34.1 index a775d74d14e..27505c18493 100644 --- a/testsuite/expect/inc21.34.1 +++ b/testsuite/expect/inc21.34.1 @@ -34,7 +34,7 @@ proc inc21_34_1 { qostest wait_reason } { log_info "Starting $wait_reason test" if {![param_contains [get_config_param "AccountingStorageEnforce"] "safe"]} { - log_warn "This test can't be run without AccountingStorageEnforce having \"safe\" in it" + subskip "This test needs AccountingStorageEnforce=safe" return } @@ -52,56 +52,22 @@ proc inc21_34_1 { qostest wait_reason } { error "Unable to reset QOS usage" } - spawn $salloc --account=$acct -w$test_node -n$nthreads -t$timelimit $srun $bin_sleep 1 - expect { - -re "Granted job allocation ($number)" { - set job_id1 $expect_out(1,string) - log_debug "Job $job_id1 has been submitted" - } - -re "Pending job allocation ($number)" { - set job_id1 $expect_out(1,string) - error "This job should not be pending" - } - -re "error" { - error "Job allocation should not have failed" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - + set job_id1 [submit_job -fail "--account=$acct -w$test_node -n$nthreads -t$timelimit -o none --wrap '$srun $bin_sleep 1'"] + subtest {![wait_for_job $job_id1 "DONE"]} "Job should run $wait_reason" cancel_job $job_id1 if [reset_qos_usage "" $qostest] { error "Unable to reset QOS usage" } - spawn $salloc --account=$acct -w$test_node -n$nthreads -t[expr $timelimit + 1] $srun $bin_sleep 1 - expect { - -re "Pending job allocation ($number)" { - set job_id2 $expect_out(1,string) - log_debug "Job $job_id2 is waiting for resources. This is expected" - } - -re "Granted job allocation ($number)" { - set job_id2 $expect_out(1,string) - error "Job should be pending but is not" - } - timeout { - error "salloc not responding" - } - eof { - wait - } - } - - subtest [check_reason $job_id2 $wait_reason] "Job should have wait reason $wait_reason" + set job_id2 [submit_job -fail "--account=$acct -w$test_node -n$nthreads -t[expr $timelimit + 1] -o none --wrap '$srun $bin_sleep 11'"] + subtest {![wait_job_reason $job_id2 "PENDING" $wait_reason]} "Job should have wait reason $wait_reason" + cancel_job $job_id2 } message] ; # Store the error message in $message # Perform local cleanup + cancel_job $job_id1 cancel_job $job_id2 # Convert any errors into failures (after cleaning up) diff --git a/testsuite/expect/inc21.34_tests b/testsuite/expect/inc21.34_tests index 887eac46310..0fc0f2215cd 100644 --- a/testsuite/expect/inc21.34_tests +++ b/testsuite/expect/inc21.34_tests @@ -131,7 +131,7 @@ proc part_test { } { mod_qos $part_qos [array get mod_part_qos] mod_qos $job_qos [array get mod_job_qos] sleep $time_spacing - inc21_30_6 "QOSMaxNodePerJobLimit|PartitionConfig" + inc21_30_6 [list "QOSMaxNodePerJobLimit" "PartitionConfig"] set mod_job_qos(MaxNodes) "-1" set mod_part_qos(MaxNodes) "-1" @@ -262,7 +262,7 @@ proc part_test { } { mod_qos $part_qos [array get mod_part_qos] mod_qos $job_qos [array get mod_job_qos] sleep $time_spacing - inc21_30_16 "QOSMaxNodePerUserLimit|PartitionConfig" + inc21_30_16 [list "QOSMaxNodePerUserLimit" "PartitionConfig"] set mod_job_qos(MaxNodesPerUser) "-1" set mod_part_qos(MaxNodesPerUser) "-1" @@ -454,7 +454,7 @@ proc qos_test { } { mod_qos $job_qos [array get mod_job_qos] mod_qos $part_qos [array get mod_part_qos] sleep $time_spacing - inc21_30_6 "QOSMaxNodePerJobLimit|PartitionConfig" + inc21_30_6 [list "QOSMaxNodePerJobLimit" "PartitionConfig"] set mod_job_qos(MaxNodes) "-1" set mod_part_qos(MaxNodes) "-1" @@ -586,7 +586,7 @@ proc qos_test { } { mod_qos $job_qos [array get mod_job_qos] mod_qos $part_qos [array get mod_part_qos] sleep $time_spacing - inc21_30_16 "QOSMaxNodePerUserLimit|PartitionConfig" + inc21_30_16 [list "QOSMaxNodePerUserLimit" "PartitionConfig"] set mod_job_qos(MaxNodesPerUser) "-1" set mod_part_qos(MaxNodesPerUser) "-1" diff --git a/testsuite/expect/test21.30 b/testsuite/expect/test21.30 index 80bbf6036ca..190743dbd59 100755 --- a/testsuite/expect/test21.30 +++ b/testsuite/expect/test21.30 @@ -80,7 +80,6 @@ if {[check_config_select "linear"] || [default_part_exclusive]} { skip "This test is incompatible with exclusive node allocations" } -set skips 0 set test_node "" # Total cpus in test node set totcpus 0 @@ -245,7 +244,7 @@ if {$num_nodes == 0} { proc cleanup {} { - global sacctmgr qostest acct test_id skips + global sacctmgr qostest acct test_id global scontrol save_billing_weights partition # delete qos @@ -368,7 +367,7 @@ set mod_qos_vals(MaxCpus) "-1" set mod_qos_vals(MaxNodes) $maxnode_num mod_qos $qostest [array get mod_qos_vals] sleep $time_spacing -inc21_30_6 "QOSMaxNodePerJobLimit|PartitionConfig" +inc21_30_6 [list "QOSMaxNodePerJobLimit" "PartitionConfig"] set mod_qos_vals(MaxNodes) "-1" # @@ -482,7 +481,7 @@ set mod_qos_vals(MaxCpusPerUser) "-1" set mod_qos_vals(MaxNodesPerUser) $maxnodespu_num mod_qos $qostest [array get mod_qos_vals] sleep $time_spacing -inc21_30_16 "QOSMaxNodePerUserLimit|PartitionConfig" +inc21_30_16 [list "QOSMaxNodePerUserLimit" "PartitionConfig"] set mod_qos_vals(MaxNodesPerUser) "-1" # @@ -598,7 +597,3 @@ if { $one_task_pc } { } inc21_30_15 "QOSMaxBillingPerUser" set mod_qos_vals(MaxTRESPerUser=billing) "-1" - -if {$skips != 0} { - skip "Test was partially skipped (skips = $skips)" -} From 8fc78935462bc169c58d8abf6862aea3b7bdee44 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 8 Sep 2023 18:28:07 +0200 Subject: [PATCH 68/81] Testsuite - Improve test12.3 avoiding false failures Sometimes the new account/association created in slurmdbd was not send to slurmctld when jobs trying to use it were submitted, and therefore they failed. Use scontrol to wait until assocs are ready. Bug 17660 --- testsuite/expect/test12.3 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/testsuite/expect/test12.3 b/testsuite/expect/test12.3 index f76963048a2..e144e0d3f43 100755 --- a/testsuite/expect/test12.3 +++ b/testsuite/expect/test12.3 @@ -128,6 +128,9 @@ expect { } } +# Wait for assocs being available +wait_for_command -pollinterval 0.1 -fail "$scontrol show assoc_mgr accounts=$test_acct users=$user_name | $bin_grep -i 'Account=$test_acct UserName=$user_name'" + # # Submit two slurm jobs to capture job info # From 00d6393c58a6b4936d9a70df1d979be9e26323e9 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Fri, 8 Sep 2023 18:54:23 +0200 Subject: [PATCH 69/81] Testsuite - Improve test12.3 simplifying it using better patterns Bug 17660 --- testsuite/expect/test12.3 | 128 ++++++-------------------------------- 1 file changed, 18 insertions(+), 110 deletions(-) diff --git a/testsuite/expect/test12.3 b/testsuite/expect/test12.3 index e144e0d3f43..bef14e1731a 100755 --- a/testsuite/expect/test12.3 +++ b/testsuite/expect/test12.3 @@ -39,6 +39,7 @@ set job_name_1 "JOB1" set job_name_2 "JOB2" set test_acct "${test_name}-account.1" set step_id 0 +set user_name [get_my_user_name] if {[get_config_param "FrontendName"] ne "MISSING"} { skip "This test can't be run on a front end system" @@ -65,113 +66,29 @@ if {[llength $nodes] != 3} { lassign $nodes node1 node2 node3 proc cleanup {} { - global sacctmgr test_acct + global sacctmgr test_acct user_name + run_command "$sacctmgr -i delete user $user_name account=$test_acct" run_command "$sacctmgr -i delete account $test_acct" } # -# Identify the user and his current default account -# -set acct_name "" -set user_name [get_my_user_name] -spawn $sacctmgr show user $user_name -expect { - -re "$user_name *($re_word_str)" { - set acct_name $expect_out(1,string) - exp_continue - } - timeout { - fail "sacctmgr add not responding" - } - eof { - wait - } -} - +# Add a test account # -# Use sacctmgr to add a test account -# -set aamatches 0 -spawn $sacctmgr -i add account $test_acct -expect { - -re "Adding Account" { - incr aamatches - exp_continue - } - -re "Nothing new added" { - log_warn "Vestigial account $test_acct found" - incr aamatches - exp_continue - } - timeout { - fail "sacctmgr add not responding" - } - eof { - wait - } -} -if {$aamatches != 1} { - fail "sacctmgr had a problem adding account ($aamatches != 1)" -} - -# -# Add self to this new account -# -spawn $sacctmgr -i create user name=$user_name account=$test_acct -expect { - timeout { - fail "sacctmgr add not responding" - } - eof { - wait - } -} +run_command -fail "$sacctmgr -i add account $test_acct" +run_command -fail "$sacctmgr -i create user name=$user_name account=$test_acct" # Wait for assocs being available wait_for_command -pollinterval 0.1 -fail "$scontrol show assoc_mgr accounts=$test_acct users=$user_name | $bin_grep -i 'Account=$test_acct UserName=$user_name'" # -# Submit two slurm jobs to capture job info -# -set job_1_node_inx 0 -set timeout $max_job_delay -spawn $srun -N2 -w$node1,$node2 -t1 -A $test_acct -J $job_name_1 -v $bin_id -expect { - -re "launching StepId=($number)\\.$re_word_str on host ($re_word_str)," { - set job_id_1 $expect_out(1,string) - incr job_1_node_inx - exp_continue - } - timeout { - fail "srun not responding" - } - eof { - wait - } -} -subtest {$job_1_node_inx == 2} "Verify host count for job 1" "srun host count bad ($job_1_node_inx != 2)" - -# -# Second job includes one node from the first job and another different node +# Submit two jobs with different nodes and names # -set job_2_node_inx 0 -spawn $srun -N2 -w$node2,$node3 -t1 -A $test_acct -J $job_name_2 -v $bin_id -expect { +set job_id_1 [submit_job -fail "-N2 -w$node1,$node2 -t1 -A $test_acct -J $job_name_1 -o none --wrap '$bin_id'"] +set job_id_2 [submit_job -fail "-N2 -w$node2,$node3 -t1 -A $test_acct -J $job_name_2 -o none --wrap '$bin_id'"] - -re "launching StepId=($number)\\.$re_word_str on host ($re_word_str)," { - set job_id_2 $expect_out(1,string) - incr job_2_node_inx - exp_continue - } - timeout { - fail "srun not responding" - } - eof { - wait - } -} -subtest {$job_2_node_inx == 2} "Verify host count for job 2" "srun host count bad ($job_2_node_inx != 2)" +wait_for_job -fail $job_id_1 "DONE" +wait_for_job -fail $job_id_2 "DONE" # # Wait for accounting data to be propagated to slurmdbd @@ -181,20 +98,11 @@ subtest {$job_2_node_inx == 2} "Verify host count for job 2" "srun host count ba # work properly because they need that to search for active node indexes # along with the events table. # -if [wait_for_command_match "$sacct -A ${test_name}-account.1 --name=$job_name_1 -X -n -P -j $job_id_1 -S now-60 -o jobid,jobname" "$job_id_1.$job_name_1"] { - fail "Accounting data for job 1 ($job_id_1) not propogated to slurmdbd" -} -if [wait_for_command_match "$sacct -A ${test_name}-account.1 --name=$job_name_2 -X -n -P -j $job_id_2 -S now-60 -o jobid,jobname" "$job_id_2.$job_name_2"] { - fail "Accounting data for job 2 ($job_id_2) not propogated to slurmdbd" -} +wait_for_command_match -fail "$sacct -A ${test_name}-account.1 --name=$job_name_1 -X -n -P -j $job_id_1 -S now-60 -o jobid,jobname" "$job_id_1.$job_name_1" +wait_for_command_match -fail "$sacct -A ${test_name}-account.1 --name=$job_name_2 -X -n -P -j $job_id_2 -S now-60 -o jobid,jobname" "$job_id_2.$job_name_2" -# Only run these sub-tests if we haven't failed already -if {[get_subtest_fail_count] == 0} { - # - # Execute sub-tests based upon these jobs - # - inc12_3_1 $job_id_1 $job_id_2 $job_name_1 $job_name_2 $test_acct - - inc12_3_2 $job_id_1 $job_id_2 $job_name_1 $job_name_2 $test_acct \ - $node1 $node2 $node3 -} +# +# Execute sub-tests based upon these jobs +# +inc12_3_1 $job_id_1 $job_id_2 $job_name_1 $job_name_2 $test_acct +inc12_3_2 $job_id_1 $job_id_2 $job_name_1 $job_name_2 $test_acct $node1 $node2 $node3 From 106ea6de7e5c2be07a2ee5dd9023ac92c6b43aba Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Thu, 24 Aug 2023 04:09:45 +0000 Subject: [PATCH 70/81] Testsuite - Improve test1.26 removing unnecessary skip Test was being unnecessarily skipped since commit f60c256. The skip was introduced in commit 4217f672e3 due some limitation with switch/nrt, but that's not anymore relevant. Bug 17522 --- testsuite/expect/test1.26 | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/testsuite/expect/test1.26 b/testsuite/expect/test1.26 index 758642f7cef..07e941c70d4 100755 --- a/testsuite/expect/test1.26 +++ b/testsuite/expect/test1.26 @@ -37,12 +37,7 @@ if {![is_super_user]} { } if {[get_config_param "FrontendName"] ne "MISSING"} { - skip "This test is incompatible with front-end systems" -} - -set switch [get_config_param "SwitchType"] -if {$switch ne "switch/none"} { - skip "This test is incompatible with $switch" + skip "This test is incompatible with front-end systems" } # From a81984cf1cc2dd16a03bb1752f7ed1ee76dd4e08 Mon Sep 17 00:00:00 2001 From: Bjorn-Helge Mevik Date: Sat, 9 Sep 2023 08:26:27 +0200 Subject: [PATCH 71/81] Testsuite - Fix typo in test23.4 using test_dir Bug 17428 --- testsuite/expect/test32.4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/test32.4 b/testsuite/expect/test32.4 index 56c26ed1b65..3a2f5320a1b 100755 --- a/testsuite/expect/test32.4 +++ b/testsuite/expect/test32.4 @@ -31,7 +31,7 @@ set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" -set sgather_out "{test_name}_sgather.out" +set sgather_out "$test_dir/sgather.out" if {[file executable $sgather] == 0} { skip "$sgather does not exist" From 3bfb3061c14454afe0b8eb3de0ba2997c182effc Mon Sep 17 00:00:00 2001 From: Bjorn-Helge Mevik Date: Sat, 9 Sep 2023 08:46:48 +0200 Subject: [PATCH 72/81] Testsuite - Fix typo in test32.8 Bug 17428 --- testsuite/expect/test32.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/expect/test32.8 b/testsuite/expect/test32.8 index 76f65c3ccc4..61dba8759ba 100755 --- a/testsuite/expect/test32.8 +++ b/testsuite/expect/test32.8 @@ -86,7 +86,7 @@ if {$job_id == 0} { } wait_for_job -fail $job_id "DONE" -wai_for_file -fail $file_out +wait_for_file -fail $file_out set number_1 -1 set number_2 -1 From 84ae55b277ecf2b03e09d5db99af60dc7bcbfe9c Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sat, 9 Sep 2023 08:42:25 +0200 Subject: [PATCH 73/81] Testsuite - Improve test32.4 avoiding misleading errors Using better patterns to verify that source files are removed instead of ls errors. Bug 17428 --- testsuite/expect/test32.4 | 84 +++++++++++---------------------------- 1 file changed, 23 insertions(+), 61 deletions(-) diff --git a/testsuite/expect/test32.4 b/testsuite/expect/test32.4 index 3a2f5320a1b..86e2b6eca74 100755 --- a/testsuite/expect/test32.4 +++ b/testsuite/expect/test32.4 @@ -27,14 +27,16 @@ source ./globals set job_id 0 -set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" set sgather_out "$test_dir/sgather.out" +set nnodes 2 +set chksum 0 +set chkcnt 0 if {[file executable $sgather] == 0} { - skip "$sgather does not exist" + skip "This test needs $sgather installed" } if {[get_config_param "FrontendName"] ne "MISSING"} { @@ -46,6 +48,10 @@ if {[get_config_param "MULTIPLE_SLURMD"] eq "Yes"} { if {[get_config_param "SlurmdUser"] ne "root(0)"} { skip "This test is incompatible with SlurmdUser != root" } +set nodes [get_nodes_by_request "-N $nnodes -t1"] +if {[llength $nodes] != $nnodes} { + skip "This test requires $nnodes nodes in the default partition" +} proc cleanup {} { global job_id @@ -53,72 +59,28 @@ proc cleanup {} { cancel_job $job_id } -# Set env PATH to slurm dir -set env(PATH) $slurm_dir/bin:$env(PATH) - make_bash_script $file_in " env | grep SLURM_NNODES -$bin_rm -f ${sgather_out}\* -$srun $bin_cp -f $sgather $sgather_tmp +$srun $bin_cp -fv $sgather $sgather_tmp $sgather $sgather_tmp $sgather_out -sum $sgather ${sgather_out}\* -$bin_rm -f ${sgather_out}\* -$srun ls -l $sgather_tmp -$srun $bin_rm -f $sgather_tmp +$bin_sum $sgather ${sgather_out}* +$bin_rm -fv ${sgather_out}* +$srun $bin_rm -fv $sgather_tmp exit 0 " -spawn $sbatch -N1-4 -o $file_out -t1 $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "sbatch is not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "Batch job was not submitted" -} - +set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out -set number_1 -1 -set number_2 -1 -set file_cnt 0 -set rm_cnt 0 -set node_cnt 99 -spawn $bin_cat $file_out -expect { - -re "SLURM_NNODES=($number)" { - set node_cnt $expect_out(1,string) - exp_continue - } - -re "\n($number) *($number) " { - if {$number_1 == $expect_out(1,string) && $number_2 == $expect_out(2,string)} { - incr file_cnt - } else { - set number_1 $expect_out(1,string) - set number_2 $expect_out(2,string) - } - exp_continue - } - -re "No such file or directory" { - incr rm_cnt - exp_continue - } - eof { - wait - } -} -if {$file_cnt != $node_cnt} { - fail "Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)" -} -if {$rm_cnt != $node_cnt} { - fail "Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)" +set output [run_command_output -fail "$bin_cat $file_out"] +subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" +subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" +subtest {[regexp "($number) *($number) $sgather" $output - chksum chkcnt]} "Job should should print the checksum and block count of the file to be gathered" +subtest {[regexp -all "$chksum *$chkcnt ${sgather_out}" $output] == $nnodes} "sgather should send the exact same file (from each node)" +subtest {![regexp "removed \'${sgather_tmp}" $output]} "sgather should remove the source file (not the final srun command)" +foreach node $nodes { + subtest {[regexp "$chksum *$chkcnt ${sgather_out}.${node}" $output]} "sgather should send the file from $node" + subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } + From 5d59246c6d93f4d9d864a4b609e4e0c3aef2cc0a Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sat, 9 Sep 2023 09:41:39 +0200 Subject: [PATCH 74/81] Testsuite - Improve test32.8 verifying fanout behavior Although previous implementation was using the --fanout option it was not verifying its behavior, but the exact same behavior that was already verified in test32.4. New implementation uses sacct to check the expected number of sruns launched by sgather. To double check the behavior, test32.4 also adds the same check (with different expected results). Bug 17482 --- testsuite/expect/test32.4 | 11 +++++ testsuite/expect/test32.8 | 98 ++++++++++++++------------------------- 2 files changed, 46 insertions(+), 63 deletions(-) diff --git a/testsuite/expect/test32.4 b/testsuite/expect/test32.4 index 86e2b6eca74..d55b80924c8 100755 --- a/testsuite/expect/test32.4 +++ b/testsuite/expect/test32.4 @@ -73,6 +73,7 @@ set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out +# Verify file transmission set output [run_command_output -fail "$bin_cat $file_out"] subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" @@ -84,3 +85,13 @@ foreach node $nodes { subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } +# Verify fanout +if {![param_contains [get_config_param "AccountingStorageType"] "*slurmdbd"]} { + subskip "This subtest can't be run without a usable AccountStorageType" +} else { + # Wait for job being completed in the DB + wait_for_command_match -fail "$sacct -nXP -j $job_id --format State" "COMPLETED" + + set output [run_command_output -fail "$sacct -nP -j $job_id --format=JobName"] + subtest {[regexp -all "sgather" $output] == 1} "sgather should be called only once when number of nodes ($nnodes) is equal or lower than default fanout (8)" +} diff --git a/testsuite/expect/test32.8 b/testsuite/expect/test32.8 index 61dba8759ba..b3a8b57399c 100755 --- a/testsuite/expect/test32.8 +++ b/testsuite/expect/test32.8 @@ -27,14 +27,16 @@ source ./globals set job_id 0 -set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" -set sgather_out "${test_name}_sgather.out" +set sgather_out "$test_dir/sgather.out" +set nnodes 2 +set chksum 0 +set chkcnt 0 if {[file executable $sgather] == 0} { - skip "$sgather does not exist" + skip "This test needs $sgather installed" } if {[get_config_param "FrontendName"] ne "MISSING"} { @@ -46,6 +48,10 @@ if {[get_config_param "MULTIPLE_SLURMD"] eq "Yes"} { if {[get_config_param "SlurmdUser"] ne "root(0)"} { skip "This test is incompatible with SlurmdUser != root" } +set nodes [get_nodes_by_request "-N $nnodes -t1"] +if {[llength $nodes] != $nnodes} { + skip "This test requires $nnodes nodes in the default partition" +} proc cleanup {} { global job_id @@ -53,73 +59,39 @@ proc cleanup {} { cancel_job $job_id } -# Set env PATH to slurm dir -set env(PATH) $slurm_dir/bin:$env(PATH) - make_bash_script $file_in " env | grep SLURM_NNODES -$bin_rm -f ${sgather_out}\* -$srun $bin_cp -f $sgather $sgather_tmp -$sgather -F2 $sgather_tmp $sgather_out -sum $sgather ${sgather_out}\* -$bin_rm -f ${sgather_out}\* -$srun ls -l $sgather_tmp -$srun $bin_rm -f $sgather_tmp +$srun $bin_cp -fv $sgather $sgather_tmp +$sgather --fanout=1 $sgather_tmp $sgather_out +$bin_sum $sgather ${sgather_out}* +$bin_rm -fv ${sgather_out}* +$srun $bin_rm -fv $sgather_tmp exit 0 " -spawn $sbatch -N1-4 -o $file_out -t1 $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "sbatch is not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "Batch job was not submitted" -} - +set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out -set number_1 -1 -set number_2 -1 -set file_cnt 0 -set rm_cnt 0 -set node_cnt 99 -spawn $bin_cat $file_out -expect { - -re "SLURM_NNODES=($number)" { - set node_cnt $expect_out(1,string) - exp_continue - } - -re "\n($number) *($number) " { - if {$number_1 == $expect_out(1,string) && $number_2 == $expect_out(2,string)} { - incr file_cnt - } else { - set number_1 $expect_out(1,string) - set number_2 $expect_out(2,string) - } - exp_continue - } - -re "No such file or directory" { - log_debug "Error is expected. No worries" - incr rm_cnt - exp_continue - } - eof { - wait - } -} -if {$file_cnt != $node_cnt} { - fail "Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)" +# Verify file transmission +set output [run_command_output -fail "$bin_cat $file_out"] +subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" +subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" +subtest {[regexp "($number) *($number) $sgather" $output - chksum chkcnt]} "Job should should print the checksum and block count of the file to be gathered" +subtest {[regexp -all "$chksum *$chkcnt ${sgather_out}" $output] == $nnodes} "sgather should send the exact same file (from each node)" +subtest {![regexp "removed \'${sgather_tmp}" $output]} "sgather should remove the source file (not the final srun command)" +foreach node $nodes { + subtest {[regexp "$chksum *$chkcnt ${sgather_out}.${node}" $output]} "sgather should send the file from $node" + subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } -if {$rm_cnt != $node_cnt} { - fail "Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)" + +# Verify fanout +if {![param_contains [get_config_param "AccountingStorageType"] "*slurmdbd"]} { + subskip "This subtest can't be run without a usable AccountStorageType" +} else { + # Wait for job being completed in the DB + wait_for_command_match -fail "$sacct -nXP -j $job_id --format State" "COMPLETED" + + set output [run_command_output -fail "$sacct -nP -j $job_id --format=JobName"] + subtest {[regexp -all "sgather" $output] == $nnodes} "sgather should be called once per node with fanout=1" } From e75a2d3b57cbc0ad1562d3ade2ae6018a40b55c8 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sat, 9 Sep 2023 09:55:46 +0200 Subject: [PATCH 75/81] Testsuite - Improve test32.5 using better patterns This test should follow the same patterns used in tests 32.4 and 32.8. Bug 17458 --- testsuite/expect/test32.5 | 83 ++++++++++++--------------------------- 1 file changed, 25 insertions(+), 58 deletions(-) diff --git a/testsuite/expect/test32.5 b/testsuite/expect/test32.5 index 8be71bfc272..f5e09af2e4c 100755 --- a/testsuite/expect/test32.5 +++ b/testsuite/expect/test32.5 @@ -27,14 +27,16 @@ source ./globals set job_id 0 -set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" -set sgather_out "${test_name}_sgather.out" +set sgather_out "$test_dir/sgather.out" +set nnodes 2 +set chksum 0 +set chkcnt 0 if {[file executable $sgather] == 0} { - skip "$sgather does not exist" + skip "This test needs $sgather installed" } if {[get_config_param "FrontendName"] ne "MISSING"} { @@ -46,6 +48,10 @@ if {[get_config_param "MULTIPLE_SLURMD"] eq "Yes"} { if {[get_config_param "SlurmdUser"] ne "root(0)"} { skip "This test is incompatible with SlurmdUser != root" } +set nodes [get_nodes_by_request "-N $nnodes -t1"] +if {[llength $nodes] != $nnodes} { + skip "This test requires $nnodes nodes in the default partition" +} proc cleanup {} { global job_id @@ -53,67 +59,28 @@ proc cleanup {} { cancel_job $job_id } -# Set env PATH to slurm dir -set env(PATH) $slurm_dir/bin:$env(PATH) - make_bash_script $file_in " env | grep SLURM_NNODES -$bin_rm -f ${sgather_out}\* -$srun $bin_cp -f $sgather $sgather_tmp -$sgather -k $sgather_tmp $sgather_out -sum $sgather ${sgather_out}\* -$bin_rm -f ${sgather_out}\* -$srun ls -l $sgather_tmp -$srun $bin_rm -f $sgather_tmp +$srun $bin_cp -fv $sgather $sgather_tmp +$sgather --keep $sgather_tmp $sgather_out +$bin_sum $sgather ${sgather_out}* +$bin_rm -fv ${sgather_out}* +$srun $bin_rm -fv $sgather_tmp exit 0 " -spawn $sbatch -N1-4 -o $file_out -t1 $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "sbatch is not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "Batch job was not submitted" -} - +set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out -set number_1 -1 -set number_2 -1 -set file_cnt 0 -set node_cnt 99 -spawn $bin_cat $file_out -expect { - -re "SLURM_NNODES=($number)" { - set node_cnt $expect_out(1,string) - exp_continue - } - -re "\n($number) *($number) " { - if {$number_1 == $expect_out(1,string) && $number_2 == $expect_out(2,string)} { - incr file_cnt - } else { - set number_1 $expect_out(1,string) - set number_2 $expect_out(2,string) - } - exp_continue - } - -re "No such file or directory" { - fail "Failed to keep gathered files from all allocated nodes" - } - eof { - wait - } -} -if {$file_cnt != $node_cnt} { - fail "Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)" +# Verify file transmission +set output [run_command_output -fail "$bin_cat $file_out"] +subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" +subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" +subtest {[regexp "($number) *($number) $sgather" $output - chksum chkcnt]} "Job should should print the checksum and block count of the file to be gathered" +subtest {[regexp -all "$chksum *$chkcnt ${sgather_out}" $output] == $nnodes} "sgather should send the exact same file (from each node)" +subtest {[regexp -all "removed \'${sgather_tmp}" $output] == $nnodes} "sgather should NOT remove the source file, but the final srun rm command" +foreach node $nodes { + subtest {[regexp "$chksum *$chkcnt ${sgather_out}.${node}" $output]} "sgather should send the file from $node" + subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } From a823108715ca0beef45362fff65b98ac94e93e1c Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sat, 9 Sep 2023 10:56:52 +0200 Subject: [PATCH 76/81] Testsuite - Improve test32.6 verifying that -C is used Enable sgather's verbose to see the scp commands being used. Bug 17428 --- testsuite/expect/test32.6 | 93 ++++++++++++--------------------------- 1 file changed, 29 insertions(+), 64 deletions(-) diff --git a/testsuite/expect/test32.6 b/testsuite/expect/test32.6 index e31a3b85b2d..cfd8c546737 100755 --- a/testsuite/expect/test32.6 +++ b/testsuite/expect/test32.6 @@ -25,16 +25,19 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals +source ./globals set job_id 0 -set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" -set sgather_out "${test_name}_sgather.out" +set sgather_out "$test_dir/sgather.out" +set nnodes 2 +set chksum 0 +set chkcnt 0 if {[file executable $sgather] == 0} { - skip "$sgather does not exist" + skip "This test needs $sgather installed" } if {[get_config_param "FrontendName"] ne "MISSING"} { @@ -46,6 +49,10 @@ if {[get_config_param "MULTIPLE_SLURMD"] eq "Yes"} { if {[get_config_param "SlurmdUser"] ne "root(0)"} { skip "This test is incompatible with SlurmdUser != root" } +set nodes [get_nodes_by_request "-N $nnodes -t1"] +if {[llength $nodes] != $nnodes} { + skip "This test requires $nnodes nodes in the default partition" +} proc cleanup {} { global job_id @@ -53,73 +60,31 @@ proc cleanup {} { cancel_job $job_id } -# Set env PATH to slurm dir -set env(PATH) $slurm_dir/bin:$env(PATH) - make_bash_script $file_in " env | grep SLURM_NNODES -$bin_rm -f ${sgather_out}\* -$srun $bin_cp -f $sgather $sgather_tmp -$sgather -C $sgather_tmp $sgather_out -sum $sgather ${sgather_out}\* -$bin_rm -f ${sgather_out}\* -$srun ls -l $sgather_tmp -$srun $bin_rm -f $sgather_tmp +$srun $bin_cp -fv $sgather $sgather_tmp +$sgather -vC $sgather_tmp $sgather_out +$bin_sum $sgather ${sgather_out}* +$bin_rm -fv ${sgather_out}* +$srun $bin_rm -fv $sgather_tmp exit 0 " -spawn $sbatch -N1-4 -o $file_out -t1 $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "sbatch is not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "Batch job was not submitted" -} - +set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out -set number_1 -1 -set number_2 -1 -set file_cnt 0 -set rm_cnt 0 -set node_cnt 99 -spawn $bin_cat $file_out -expect { - -re "SLURM_NNODES=($number)" { - set node_cnt $expect_out(1,string) - exp_continue - } - -re "\n($number) *($number) " { - if {$number_1 == $expect_out(1,string) && $number_2 == $expect_out(2,string)} { - incr file_cnt - } else { - set number_1 $expect_out(1,string) - set number_2 $expect_out(2,string) - } - exp_continue - } - -re "No such file or directory" { - log_debug "Error is expected. No worries" - incr rm_cnt - exp_continue - } - eof { - wait - } -} -if {$file_cnt != $node_cnt} { - fail "Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)" -} -if {$rm_cnt == 0} { - fail "Failed to remove gathered files from all allocated nodes ($rm_cnt == 0)" +# Verify file transmission +set output [run_command_output -fail "$bin_cat $file_out"] +subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" +subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" +subtest {[regexp "($number) *($number) $sgather" $output - chksum chkcnt]} "Job should should print the checksum and block count of the file to be gathered" +subtest {[regexp -all "$chksum *$chkcnt ${sgather_out}" $output] == $nnodes} "sgather should send the exact same file (from each node)" +subtest {![regexp "removed \'${sgather_tmp}" $output]} "sgather should remove the source file (not the final srun command)" +foreach node $nodes { + subtest {[regexp "$chksum *$chkcnt ${sgather_out}.${node}" $output]} "sgather should send the file from $node" + subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } + +# Verify compress +subtest {[regexp -all "executing \"scp -C" $output] == $nnodes} "sgather should use the -C option when launching scp in all nodes" From c9e36a50151c3fcb2d7d3c0151455b86d9fab5a0 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sat, 9 Sep 2023 11:27:37 +0200 Subject: [PATCH 77/81] Testsuite - Improve test32.9 avoiding mismatching locale time formats Bug 17428 --- testsuite/expect/test32.9 | 88 +++++++++++++++------------------------ 1 file changed, 33 insertions(+), 55 deletions(-) diff --git a/testsuite/expect/test32.9 b/testsuite/expect/test32.9 index e837d4f8534..ea3ceb1772c 100755 --- a/testsuite/expect/test32.9 +++ b/testsuite/expect/test32.9 @@ -27,14 +27,17 @@ source ./globals set job_id 0 -set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" -set sgather_out "${test_name}_sgather.out" +set sgather_out "$test_dir/sgather.out" +set nnodes 2 +set chksum 0 +set chkcnt 0 +set stamp 01020304 if {[file executable $sgather] == 0} { - skip "$sgather does not exist" + skip "This test needs $sgather installed" } if {[get_config_param "FrontendName"] ne "MISSING"} { @@ -46,6 +49,10 @@ if {[get_config_param "MULTIPLE_SLURMD"] eq "Yes"} { if {[get_config_param "SlurmdUser"] ne "root(0)"} { skip "This test is incompatible with SlurmdUser != root" } +set nodes [get_nodes_by_request "-N $nnodes -t1"] +if {[llength $nodes] != $nnodes} { + skip "This test requires $nnodes nodes in the default partition" +} proc cleanup {} { global job_id @@ -53,64 +60,35 @@ proc cleanup {} { cancel_job $job_id } -# Set env PATH to slurm dir -set env(PATH) $slurm_dir/bin:$env(PATH) - make_bash_script $file_in " env | grep SLURM_NNODES -$bin_rm -f ${sgather_out}\* -$srun $bin_cp -f $sgather $sgather_tmp -$srun touch -t01020304 $sgather_tmp -$srun ls -l $sgather_tmp -$sgather -p $sgather_tmp $sgather_out -ls -l ${sgather_out}\* -$bin_rm -f ${sgather_out}\* -$srun ls -l $sgather_tmp -$srun $bin_rm -f $sgather_tmp +$srun $bin_cp -fv $sgather $sgather_tmp +$srun touch -t $stamp $sgather_tmp +$srun ls -l --time-style=+%m%d%H%M $sgather_tmp +$sgather --preserve $sgather_tmp $sgather_out +$bin_sum $sgather ${sgather_out}* +ls -l --time-style=+%m%d%H%M ${sgather_out}* +$bin_rm -fv ${sgather_out}* +$srun $bin_rm -fv $sgather_tmp exit 0 " -spawn $sbatch -N1-2 -o $file_out -t2 $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "sbatch is not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "Batch job was not submitted" -} - +set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out -set matches 0 -set node_cnt 99 -spawn $bin_cat $file_out -expect { - -re "SLURM_NNODES=($number)" { - set node_cnt $expect_out(1,string) - exp_continue - } - -re " Jan +2 " { - incr matches - exp_continue - } - -re "No such file or directory" { - log_debug "Error is expected. No worries" - incr rm_cnt - exp_continue - } - eof { - wait - } -} -if {$matches != [expr $node_cnt * 2]} { - fail "Failed to preserve file time ($matches != [expr $node_cnt * 2])" +# Verify file transmission +set output [run_command_output -fail "$bin_cat $file_out"] +subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" +subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" +subtest {[regexp "($number) *($number) $sgather" $output - chksum chkcnt]} "Job should should print the checksum and block count of the file to be gathered" +subtest {[regexp -all "$chksum *$chkcnt ${sgather_out}" $output] == $nnodes} "sgather should send the exact same file (from each node)" +subtest {![regexp "removed \'${sgather_tmp}" $output]} "sgather should remove the source file (not the final srun command)" +foreach node $nodes { + subtest {[regexp "$chksum *$chkcnt ${sgather_out}.${node}" $output]} "sgather should send the file from $node" + subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } + +# Verify --preserve +subtest {[regexp -all "$stamp $sgather_tmp" $output] == $nnodes} "Initial touch should set the right time stamp ($stamp) in all nodes" +subtest {[regexp -all "$stamp $sgather_out" $output] == $nnodes} "sgather should preserve the time stamp ($stamp) from all nodes" From 649f09d41de6e712c834f7c10e1ba6104788f3ee Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Sat, 9 Sep 2023 12:05:43 +0200 Subject: [PATCH 78/81] Testsuite - Improve test32.12 verifying that ConnectTimeout is used Enable sgather's verbose to see the scp commands being used. Bug 17428 --- testsuite/expect/test32.12 | 94 ++++++++++++-------------------------- 1 file changed, 30 insertions(+), 64 deletions(-) diff --git a/testsuite/expect/test32.12 b/testsuite/expect/test32.12 index bffa94c1ee8..562a977bafe 100755 --- a/testsuite/expect/test32.12 +++ b/testsuite/expect/test32.12 @@ -27,14 +27,17 @@ source ./globals set job_id 0 -set hostname "" set file_in "$test_dir/input" set file_out "$test_dir/output" set sgather_tmp "/tmp/$test_name" -set sgather_out "${test_name}_sgather.out" +set sgather_out "$test_dir/sgather.out" +set nnodes 2 +set chksum 0 +set chkcnt 0 +set sgather_time 20 if {[file executable $sgather] == 0} { - skip "$sgather does not exist" + skip "This test needs $sgather installed" } if {[get_config_param "FrontendName"] ne "MISSING"} { @@ -46,6 +49,10 @@ if {[get_config_param "MULTIPLE_SLURMD"] eq "Yes"} { if {[get_config_param "SlurmdUser"] ne "root(0)"} { skip "This test is incompatible with SlurmdUser != root" } +set nodes [get_nodes_by_request "-N $nnodes -t1"] +if {[llength $nodes] != $nnodes} { + skip "This test requires $nnodes nodes in the default partition" +} proc cleanup {} { global job_id @@ -53,73 +60,32 @@ proc cleanup {} { cancel_job $job_id } -# Set env PATH to slurm dir -set env(PATH) $slurm_dir/bin:$env(PATH) - +# $sgather -v --timeout=$sgather_time $sgather_tmp $sgather_out make_bash_script $file_in " env | grep SLURM_NNODES -$bin_rm -f ${sgather_out}\* -$srun $bin_cp -f $sgather $sgather_tmp -$sgather -t20 $sgather_tmp $sgather_out -sum $sgather ${sgather_out}\* -$bin_rm -f ${sgather_out}\* -$srun ls -l $sgather_tmp -$srun $bin_rm -f $sgather_tmp +$srun $bin_cp -fv $sgather $sgather_tmp +$sgather -v -t $sgather_time $sgather_tmp $sgather_out +$bin_sum $sgather ${sgather_out}* +$bin_rm -fv ${sgather_out}* +$srun $bin_rm -fv $sgather_tmp exit 0 " -spawn $sbatch -N1-4 -o $file_out -t1 $file_in -expect { - -re "Submitted batch job ($number)" { - set job_id $expect_out(1,string) - exp_continue - } - timeout { - fail "sbatch is not responding" - } - eof { - wait - } -} -if {$job_id == 0} { - fail "Batch job was not submitted" -} - +set job_id [submit_job -fail "-N $nnodes -o $file_out -t1 $file_in"] wait_for_job -fail $job_id "DONE" wait_for_file -fail $file_out -set number_1 -1 -set number_2 -1 -set file_cnt 0 -set rm_cnt 0 -set node_cnt 99 -spawn $bin_cat $file_out -expect { - -re "SLURM_NNODES=($number)" { - set node_cnt $expect_out(1,string) - exp_continue - } - -re "\n($number) *($number) " { - if {$number_1 == $expect_out(1,string) && $number_2 == $expect_out(2,string)} { - incr file_cnt - } else { - set number_1 $expect_out(1,string) - set number_2 $expect_out(2,string) - } - exp_continue - } - -re "No such file or directory" { - log_debug "Error is expected. No worries" - incr rm_cnt - exp_continue - } - eof { - wait - } -} -if {$file_cnt != $node_cnt} { - fail "Failed to gather files from all allocated nodes ($file_cnt != $node_cnt)" -} -if {$rm_cnt != $node_cnt} { - fail "Failed to remove gathered files from all allocated nodes ($rm_cnt != $node_cnt)" +# Verify file transmission +set output [run_command_output -fail "$bin_cat $file_out"] +subtest {[regexp "SLURM_NNODES=$nnodes" $output]} "Job should use $nnodes nodes" +subtest {[regexp -all "$sgather\' -> \'$sgather_tmp" $output] == $nnodes} "Initial srun should copy the file to gather in all $nnodes nodes" +subtest {[regexp "($number) *($number) $sgather" $output - chksum chkcnt]} "Job should should print the checksum and block count of the file to be gathered" +subtest {[regexp -all "$chksum *$chkcnt ${sgather_out}" $output] == $nnodes} "sgather should send the exact same file (from each node)" +subtest {![regexp "removed \'${sgather_tmp}" $output]} "sgather should remove the source file (not the final srun command)" +foreach node $nodes { + subtest {[regexp "$chksum *$chkcnt ${sgather_out}.${node}" $output]} "sgather should send the file from $node" + subtest {[regexp "removed \'${sgather_out}.${node}" $output]} "Job srun should remove sent file from $node" } + +# Verify --timeout +subtest {[regexp -all "executing \"scp -o ConnectTimeout=$sgather_time" $output] == $nnodes} "sgather should use the right -o ConnectTimeout option when launching scp in all nodes" From 48ff517e2f61ef3ff172aa15cfde16b7fe0baf68 Mon Sep 17 00:00:00 2001 From: Albert Gil Date: Tue, 12 Sep 2023 13:40:40 +0200 Subject: [PATCH 79/81] Testsuite - Improve test1.74 waiting for updates in slurmctld Bug 15140 --- testsuite/expect/test1.74 | 57 ++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/testsuite/expect/test1.74 b/testsuite/expect/test1.74 index 893c89ad2e2..05b9d8b214d 100755 --- a/testsuite/expect/test1.74 +++ b/testsuite/expect/test1.74 @@ -32,7 +32,7 @@ source ./globals_accounting set node_cnt 0 set cluster [get_config_param "ClusterName"] set node_name "" -set user "" +set user [get_my_user_name] set acct "${test_name}_acct" set acct_c1 "${test_name}_acct_c_1" set acct_c2 "${test_name}_acct_c_2" @@ -55,8 +55,6 @@ if {[get_admin_level] ne "Administrator"} { skip "This test can't be run without being an Accounting administrator" } -set user [get_my_user_name] - proc cleanup { } { global acct acct_c1 acct_c2 qos @@ -176,8 +174,13 @@ srun_test $node_cnt $acct set mod_qos_vals(MaxNodes) [expr $node_cnt - 1] mod_qos $qos [array get mod_qos_vals] set mod_qos_vals(MaxNodes) -1 -# some times the message takes a little time for some reason -sleep 1 + +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "MaxTRESPJ=node=[expr $node_cnt - 1]" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct user=$user qos=$qos"] +} + srun_test [expr $node_cnt-1] $acct # now make sure the maxnodes of the QOS overrides the association @@ -185,6 +188,13 @@ set mod_acct_assoc_vals(maxnodes) 1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { fail "Account was not modified" } + +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "MaxTRESPJ=node=1" $output] && [regexp "MaxTRESPJ=node=[expr $node_cnt - 1]" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct user=$user qos=$qos"] +} + srun_test [expr $node_cnt-1] $acct # Reset acct maxnodes @@ -196,7 +206,12 @@ if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [ar # Now run test using GrpNode limits of qos set mod_qos_vals(GrpNodes) [expr $node_cnt - 1] mod_qos $qos [array get mod_qos_vals] -sleep 1 + +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "GrpTRES=.*node=[expr $node_cnt - 1]" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct user=$user qos=$qos"] +} srun_test [expr $node_cnt-1] $acct @@ -205,6 +220,13 @@ set mod_acct_assoc_vals(grpnodes) 1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { fail "Account was not modified" } + +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "GrpTRES=.*node=[expr $node_cnt - 1]" $output] && [regexp "GrpTRES=.*node=1" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct user=$user qos=$qos"] +} + srun_test [expr $node_cnt-1] $acct # Now make sure maxnodes is the max of the association and grpnodes of the @@ -215,6 +237,13 @@ set mod_acct_assoc_vals(maxnodes) 1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { fail "Account was not modified" } + +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "GrpTRES=.*node=N" $output] && [regexp "MaxTRESPJ=node=1" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct user=$user qos=$qos"] +} + srun_test 1 $acct set mod_acct_assoc_vals(maxnodes) -1 @@ -235,6 +264,12 @@ if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [a fail "Account was not modified" } +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "GrpTRES=.*node=N" $output] && ![regexp "MaxTRESPJ=node" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct user=$user qos=$qos"] +} + # Run srun test on parent and child accounts srun_test $node_cnt $acct srun_test [expr $node_cnt - 1] $acct_c1 @@ -255,6 +290,16 @@ set mod_acct_assoc_vals(MaxNode) -1 set mod_acct_assoc_vals(GrpNode) [expr $node_cnt - 2] mod_acct $acct_c2 [array get mod_acct_desc] [array get mod_acct_vals] [array get mod_acct_assoc_vals] +# Wait until slurmcltd is updated +set output "" +wait_for -fail {[regexp "GrpTRES=.*node=[expr $node_cnt - 1]" $output] && ![regexp "MaxTRESPJ=node" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct_c1 user=$user qos=$qos"] +} +set output "" +wait_for -fail {[regexp "GrpTRES=.*node=[expr $node_cnt - 2]" $output] && ![regexp "MaxTRESPJ=node" $output]} { + set output [run_command_output -fail "$scontrol show assoc_mgr account=$acct_c2 user=$user qos=$qos"] +} + # Run srun test on parent and child accounts srun_test $node_cnt $acct srun_test [expr $node_cnt - 1] $acct_c1 From 678a364d8763888b878ab8a1cdc9c49d3e3ab229 Mon Sep 17 00:00:00 2001 From: Tim McMullan Date: Thu, 14 Sep 2023 12:56:02 -0600 Subject: [PATCH 80/81] Run autoreconf. --- aclocal.m4 | 90 ++++++++++++++++++++++++++++++++++++------ configure | 114 ++++++++++++++++++++++++++--------------------------- 2 files changed, 136 insertions(+), 68 deletions(-) diff --git a/aclocal.m4 b/aclocal.m4 index b3be7044a40..5b82ceae6ae 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -426,7 +426,7 @@ main () rm -f conf.gtktest ]) -# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- # serial 12 (pkg-config-0.29.2) dnl Copyright © 2004 Scott James Remnant . @@ -514,7 +514,7 @@ dnl Check to see whether a particular set of modules exists. Similar to dnl PKG_CHECK_MODULES(), but does not set variables or print errors. dnl dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -dnl only at the first occurence in configure.ac, so if the first place +dnl only at the first occurrence in configure.ac, so if the first place dnl it's called might be skipped (such as if it is within an "if", you dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], @@ -583,14 +583,14 @@ if test $pkg_failed = yes; then AC_MSG_RESULT([no]) _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then - $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` else - $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD - m4_default([$4], [AC_MSG_ERROR( + m4_default([$4], [AC_MSG_ERROR( [Package requirements ($2) were not met: $$1_PKG_ERRORS @@ -602,7 +602,7 @@ _PKG_TEXT])[]dnl ]) elif test $pkg_failed = untried; then AC_MSG_RESULT([no]) - m4_default([$4], [AC_MSG_FAILURE( + m4_default([$4], [AC_MSG_FAILURE( [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. @@ -612,10 +612,10 @@ _PKG_TEXT To get pkg-config, see .])[]dnl ]) else - $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS - $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) - $3 + $3 fi[]dnl ])dnl PKG_CHECK_MODULES @@ -702,6 +702,74 @@ AS_VAR_COPY([$1], [pkg_cv_][$1]) AS_VAR_IF([$1], [""], [$5], [$4])dnl ])dnl PKG_CHECK_VAR +dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND], +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------ +dnl +dnl Prepare a "--with-" configure option using the lowercase +dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and +dnl PKG_CHECK_MODULES in a single macro. +AC_DEFUN([PKG_WITH_MODULES], +[ +m4_pushdef([with_arg], m4_tolower([$1])) + +m4_pushdef([description], + [m4_default([$5], [build with ]with_arg[ support])]) + +m4_pushdef([def_arg], [m4_default([$6], [auto])]) +m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes]) +m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no]) + +m4_case(def_arg, + [yes],[m4_pushdef([with_without], [--without-]with_arg)], + [m4_pushdef([with_without],[--with-]with_arg)]) + +AC_ARG_WITH(with_arg, + AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),, + [AS_TR_SH([with_]with_arg)=def_arg]) + +AS_CASE([$AS_TR_SH([with_]with_arg)], + [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)], + [auto],[PKG_CHECK_MODULES([$1],[$2], + [m4_n([def_action_if_found]) $3], + [m4_n([def_action_if_not_found]) $4])]) + +m4_popdef([with_arg]) +m4_popdef([description]) +m4_popdef([def_arg]) + +])dnl PKG_WITH_MODULES + +dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ----------------------------------------------- +dnl +dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES +dnl check._[VARIABLE-PREFIX] is exported as make variable. +AC_DEFUN([PKG_HAVE_WITH_MODULES], +[ +PKG_WITH_MODULES([$1],[$2],,,[$3],[$4]) + +AM_CONDITIONAL([HAVE_][$1], + [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"]) +])dnl PKG_HAVE_WITH_MODULES + +dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------------------ +dnl +dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after +dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make +dnl and preprocessor variable. +AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES], +[ +PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4]) + +AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"], + [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])]) +])dnl PKG_HAVE_DEFINE_WITH_MODULES + # Copyright (C) 2002-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation diff --git a/configure b/configure index df63023e752..15bcb684a4b 100755 --- a/configure +++ b/configure @@ -7153,11 +7153,11 @@ if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 printf %s "checking for $CXX option to enable C++11 features... " >&6; } -if test ${ac_cv_prog_cxx_11+y} +if test ${ac_cv_prog_cxx_cxx11+y} then : printf %s "(cached) " >&6 else $as_nop - ac_cv_prog_cxx_11=no + ac_cv_prog_cxx_cxx11=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -7199,11 +7199,11 @@ if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 printf %s "checking for $CXX option to enable C++98 features... " >&6; } -if test ${ac_cv_prog_cxx_98+y} +if test ${ac_cv_prog_cxx_cxx98+y} then : printf %s "(cached) " >&6 else $as_nop - ac_cv_prog_cxx_98=no + ac_cv_prog_cxx_cxx98=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -24468,21 +24468,21 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - libselinux_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libselinux" 2>&1` + libselinux_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libselinux" 2>&1` else - libselinux_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libselinux" 2>&1` + libselinux_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libselinux" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$libselinux_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$libselinux_PKG_ERRORS" >&5 - as_fn_error $? "cannot locate libselinux" "$LINENO" 5 + as_fn_error $? "cannot locate libselinux" "$LINENO" 5 elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - as_fn_error $? "cannot locate libselinux" "$LINENO" 5 + as_fn_error $? "cannot locate libselinux" "$LINENO" 5 else - libselinux_CFLAGS=$pkg_cv_libselinux_CFLAGS - libselinux_LIBS=$pkg_cv_libselinux_LIBS + libselinux_CFLAGS=$pkg_cv_libselinux_CFLAGS + libselinux_LIBS=$pkg_cv_libselinux_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -24658,24 +24658,24 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - dbus_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "dbus-1" 2>&1` + dbus_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "dbus-1" 2>&1` else - dbus_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "dbus-1" 2>&1` + dbus_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "dbus-1" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$dbus_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$dbus_PKG_ERRORS" >&5 - x_ac_have_dbus="no" + x_ac_have_dbus="no" elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - x_ac_have_dbus="no" + x_ac_have_dbus="no" else - dbus_CFLAGS=$pkg_cv_dbus_CFLAGS - dbus_LIBS=$pkg_cv_dbus_LIBS + dbus_CFLAGS=$pkg_cv_dbus_CFLAGS + dbus_LIBS=$pkg_cv_dbus_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - x_ac_have_dbus="yes" + x_ac_have_dbus="yes" fi if test "x$x_ac_have_dbus" = "xyes"; then WITH_DBUS_TRUE= @@ -24747,24 +24747,24 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - CHECK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "check >= 0.9.8" 2>&1` + CHECK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "check >= 0.9.8" 2>&1` else - CHECK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "check >= 0.9.8" 2>&1` + CHECK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "check >= 0.9.8" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$CHECK_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$CHECK_PKG_ERRORS" >&5 - ac_have_check="no" + ac_have_check="no" elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - ac_have_check="no" + ac_have_check="no" else - CHECK_CFLAGS=$pkg_cv_CHECK_CFLAGS - CHECK_LIBS=$pkg_cv_CHECK_LIBS + CHECK_CFLAGS=$pkg_cv_CHECK_CFLAGS + CHECK_LIBS=$pkg_cv_CHECK_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - ac_have_check="yes" + ac_have_check="yes" fi if test "x$ac_have_check" = "xyes"; then HAVE_CHECK_TRUE= @@ -25013,24 +25013,24 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - GLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$pkg_config_args" 2>&1` + GLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$pkg_config_args" 2>&1` else - GLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$pkg_config_args" 2>&1` + GLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$pkg_config_args" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$GLIB_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$GLIB_PKG_ERRORS" >&5 - : + : elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - : + : else - GLIB_CFLAGS=$pkg_cv_GLIB_CFLAGS - GLIB_LIBS=$pkg_cv_GLIB_LIBS + GLIB_CFLAGS=$pkg_cv_GLIB_CFLAGS + GLIB_LIBS=$pkg_cv_GLIB_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - : + : fi @@ -26042,24 +26042,24 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - HPE_SLINGSHOT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libcxi" 2>&1` + HPE_SLINGSHOT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libcxi" 2>&1` else - HPE_SLINGSHOT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libcxi" 2>&1` + HPE_SLINGSHOT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libcxi" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$HPE_SLINGSHOT_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$HPE_SLINGSHOT_PKG_ERRORS" >&5 - x_ac_have_libcxi="no" + x_ac_have_libcxi="no" elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - x_ac_have_libcxi="no" + x_ac_have_libcxi="no" else - HPE_SLINGSHOT_CFLAGS=$pkg_cv_HPE_SLINGSHOT_CFLAGS - HPE_SLINGSHOT_LIBS=$pkg_cv_HPE_SLINGSHOT_LIBS + HPE_SLINGSHOT_CFLAGS=$pkg_cv_HPE_SLINGSHOT_CFLAGS + HPE_SLINGSHOT_LIBS=$pkg_cv_HPE_SLINGSHOT_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - x_ac_have_libcxi="yes" + x_ac_have_libcxi="yes" fi if test x$x_ac_have_libcxi = xyes; then WITH_SWITCH_HPE_SLINGSHOT_TRUE= @@ -26804,24 +26804,24 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - lua_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "${x_ac_lua_pkg_name}" 2>&1` + lua_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "${x_ac_lua_pkg_name}" 2>&1` else - lua_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "${x_ac_lua_pkg_name}" 2>&1` + lua_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "${x_ac_lua_pkg_name}" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$lua_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$lua_PKG_ERRORS" >&5 - x_ac_have_lua="no" + x_ac_have_lua="no" elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - x_ac_have_lua="no" + x_ac_have_lua="no" else - lua_CFLAGS=$pkg_cv_lua_CFLAGS - lua_LIBS=$pkg_cv_lua_LIBS + lua_CFLAGS=$pkg_cv_lua_CFLAGS + lua_LIBS=$pkg_cv_lua_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - x_ac_have_lua="yes" + x_ac_have_lua="yes" fi if test "x$x_ac_have_lua" = "xyes"; then From ae118938c925ee42c59f2f70399a366ed0903422 Mon Sep 17 00:00:00 2001 From: Nathan Prisbrey Date: Mon, 18 Sep 2023 11:38:56 +0200 Subject: [PATCH 81/81] Testsuite - Remove reference to nonexisting test1.39 This test1.39 was removed in commit 8c17aa6996. --- testsuite/README | 1 - 1 file changed, 1 deletion(-) diff --git a/testsuite/README b/testsuite/README index 2a7106dc38f..70188c83c3c 100644 --- a/testsuite/README +++ b/testsuite/README @@ -110,7 +110,6 @@ test1.36 Test parallel launch of srun (e.g. "srun srun hostname") test1.37 Test of srun --ntasks-per-node option. test1.38 Test srun handling of SIGINT to get task status or kill the job (--quit-on-interrupt option). -test1.39 Test of linux light-weight core files. test1.40 Test of job account (--account option). test1.41 Validate Slurm debugger infrastructure (--debugger-test option). test1.43 Test of slurm_job_will_run API, (srun --test-only option).