|
Line 0
Link Here
|
|
|
1 |
#!/usr/bin/expect |
| 2 |
############################################################################ |
| 3 |
# Purpose: Test of SLURM functionality |
| 4 |
# Test if the job dies when the one of the pass over nodes are |
| 5 |
# set to an error state. |
| 6 |
# |
| 7 |
# |
| 8 |
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR |
| 9 |
# "FAILURE: ..." otherwise with an explanation of the failure, OR |
| 10 |
# anything else indicates a failure mode that must be investigated. |
| 11 |
############################################################################ |
| 12 |
# Copyright (C) 2011 SchedMD LLC |
| 13 |
# Written by Nathan Yee <nyee32@schedmd.com> |
| 14 |
# |
| 15 |
# This file is part of SLURM, a resource management program. |
| 16 |
# For details, see <http://www.schedmd.com/slurmdocs/>. |
| 17 |
# Please also read the included file: DISCLAIMER. |
| 18 |
# |
| 19 |
# SLURM is free software; you can redistribute it and/or modify it under |
| 20 |
# the terms of the GNU General Public License as published by the Free |
| 21 |
# Software Foundation; either version 2 of the License, or (at your option) |
| 22 |
# any later version. |
| 23 |
# |
| 24 |
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| 25 |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 26 |
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| 27 |
# details. |
| 28 |
# |
| 29 |
# You should have received a copy of the GNU General Public License along |
| 30 |
# with SLURM; if not, write to the Free Software Foundation, Inc., |
| 31 |
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 32 |
############################################################################ |
| 33 |
source ./globals |
| 34 |
|
| 35 |
set test_id "8.9" |
| 36 |
set exit_code 0 |
| 37 |
set file_in "test$test_id.input" |
| 38 |
set job_id 0 |
| 39 |
set geom "" |
| 40 |
set a_size "" |
| 41 |
set x_size "" |
| 42 |
set y_size "" |
| 43 |
set z_size "" |
| 44 |
set job_fini1 "" |
| 45 |
set job_fini2 "" |
| 46 |
set job_fini3 "" |
| 47 |
set job_fini4 "" |
| 48 |
set a_flag 0 |
| 49 |
set x_flag 0 |
| 50 |
set y_flag 0 |
| 51 |
set z_flag 0 |
| 52 |
set tmp 0 |
| 53 |
set block 0 |
| 54 |
|
| 55 |
print_header $test_id |
| 56 |
|
| 57 |
|
| 58 |
if {([test_bluegene] == 0) || [string compare [get_bluegene_type] "Q"]} { |
| 59 |
send_user "\nWARNING: This test is only compatible with bluegene systems\n" |
| 60 |
exit $exit_code |
| 61 |
} |
| 62 |
|
| 63 |
set type [get_bluegene_type] |
| 64 |
if {$type == 0} { |
| 65 |
send_user "\nFAILURE: No blugene type found \n" |
| 66 |
exit 1 |
| 67 |
} |
| 68 |
|
| 69 |
spawn $scontrol show node |
| 70 |
expect { |
| 71 |
-re "NodeName=($alpha_numeric).($digit)($digit)($digit)($digit)" { |
| 72 |
set a_size $expect_out(2,string) |
| 73 |
set x_size $expect_out(3,string) |
| 74 |
set y_size $expect_out(4,string) |
| 75 |
set z_size $expect_out(5,string) |
| 76 |
exp_continue |
| 77 |
} |
| 78 |
timeout { |
| 79 |
send_user "\nFAILURE: scontrol not responding\n" |
| 80 |
exit 1 |
| 81 |
} |
| 82 |
eof { |
| 83 |
wait |
| 84 |
} |
| 85 |
} |
| 86 |
|
| 87 |
if {$a_size>2} { |
| 88 |
set geom "2x1x1x1" |
| 89 |
set a_flag 1 |
| 90 |
} elseif {$x_size > 2} { |
| 91 |
set geom "1x2x1x1" |
| 92 |
set x_flag 1 |
| 93 |
} elseif {$y_size >2} { |
| 94 |
set geom "1x1x2x1" |
| 95 |
set y_flag 1 |
| 96 |
} elseif {$z_size >2} { |
| 97 |
set geom "1x1x1x2" |
| 98 |
set z_flag 1 |
| 99 |
} else { |
| 100 |
send_user "\nFAILURE: not enough dimenstions\n" |
| 101 |
exit 1 |
| 102 |
} |
| 103 |
|
| 104 |
make_bash_script $file_in "$bin_sleep 500" |
| 105 |
|
| 106 |
spawn $sbatch --geometry=$geom --conn-type=T,T,T,T --output=/dev/null --error=/dev/null $file_in |
| 107 |
expect { |
| 108 |
-re "Submitted batch job ($number)" { |
| 109 |
set job_id $expect_out(1,string) |
| 110 |
exp_continue |
| 111 |
} |
| 112 |
-re "error" { |
| 113 |
send_user "\nFAILURE: Job was not submitted\n" |
| 114 |
exp_continue |
| 115 |
exit 1 |
| 116 |
} |
| 117 |
timeout { |
| 118 |
send_user "\nFAILURE: sbatch is not responding\n" |
| 119 |
exit 1 |
| 120 |
} |
| 121 |
eof { |
| 122 |
wait |
| 123 |
} |
| 124 |
} |
| 125 |
|
| 126 |
wait_for_job $job_id "RUNNING" |
| 127 |
|
| 128 |
spawn $scontrol show job $job_id |
| 129 |
expect { |
| 130 |
-re "MidplaneList=($alpha_numeric).($alpha_numeric)x($digit)($digit)($digit)($digit)" { |
| 131 |
set job_fini1 $expect_out(3,string) |
| 132 |
set job_fini2 $expect_out(4,string) |
| 133 |
set job_fini3 $expect_out(5,string) |
| 134 |
set job_fini4 $expect_out(6,string) |
| 135 |
exp_continue |
| 136 |
|
| 137 |
} |
| 138 |
-re "Block_ID=($alpha_numeric)" { |
| 139 |
set block $expect_out(1,string) |
| 140 |
exp_continue |
| 141 |
} |
| 142 |
timeout { |
| 143 |
send_user "\nFAILURE: scontrol not responding\n" |
| 144 |
exit 1 |
| 145 |
} |
| 146 |
eof { |
| 147 |
wait |
| 148 |
} |
| 149 |
|
| 150 |
} |
| 151 |
|
| 152 |
if {$a_flag==1} { |
| 153 |
set last_node "bgq$a_size$job_fini2$job_fini3$job_fini4" |
| 154 |
} elseif {$x_flag==1} { |
| 155 |
set last_node "bgq$job_fini1$x_size$job_fini3$job_fini4" |
| 156 |
} elseif {$y_flag==1} { |
| 157 |
set last_node "bgq$job_fini1$job_fini2$y_size$job_fini4" |
| 158 |
} elseif {$z_flag==1} { |
| 159 |
set last_node "bgq$job_fini1$job_fini2$job_fini3$z_size" |
| 160 |
} |
| 161 |
|
| 162 |
change_subbp_state $last_node "0" "error" |
| 163 |
|
| 164 |
wait_for_job $job_id "DONE" |
| 165 |
|
| 166 |
spawn $squeue --state=failed -o%i |
| 167 |
expect { |
| 168 |
-re "($alpha_numeric)" { |
| 169 |
set tmp $expect_out(1,string) |
| 170 |
exp_continue |
| 171 |
} |
| 172 |
timeout { |
| 173 |
send_user "\nFAILURE:squeue is not responding\n" |
| 174 |
exit 1 |
| 175 |
} |
| 176 |
eof { |
| 177 |
wait |
| 178 |
} |
| 179 |
} |
| 180 |
|
| 181 |
if {$tmp!=$job_id} { |
| 182 |
send_user "\nFAILURE: Job $job_id did not fail\n" |
| 183 |
exit 1 |
| 184 |
} |
| 185 |
|
| 186 |
send_user "\nblock=$block\n" |
| 187 |
|
| 188 |
spawn $scontrol show block $block |
| 189 |
expect { |
| 190 |
-re "not found" { |
| 191 |
send_user "\nBlock was not found, this is expected.\n" |
| 192 |
exp_continue |
| 193 |
} |
| 194 |
timeout { |
| 195 |
send_user "\nFAILURE: scontrol not responding\n" |
| 196 |
exit 1 |
| 197 |
} |
| 198 |
eof { |
| 199 |
wait |
| 200 |
} |
| 201 |
} |
| 202 |
|
| 203 |
#Sets block back to free |
| 204 |
change_subbp_state $last_node "0" "free" |
| 205 |
|
| 206 |
if {$exit_code == 0} { |
| 207 |
send_user "\nSUCCESS\n" |
| 208 |
} else { |
| 209 |
send_user "\nFAILURE\n" |
| 210 |
} |
| 211 |
|
| 212 |
exit $exit_code |