View | Details | Raw Unified | Return to ticket 434 | Differences between
and this patch

Collapse All | Expand All

(-)a/testsuite/expect/Makefile.am (+1 lines)
Lines 444-449 EXTRA_DIST = \ Link Here
444
	test28.3                        \
444
	test28.3                        \
445
	test28.4                        \
445
	test28.4                        \
446
	test28.5                        \
446
	test28.5                        \
447
	test28.6			\
447
	test30.1			\
448
	test30.1			\
448
	test31.1			\
449
	test31.1			\
449
	usleep
450
	usleep
(-)a/testsuite/expect/Makefile.in (+1 lines)
Lines 783-788 EXTRA_DIST = \ Link Here
783
	test28.3                        \
783
	test28.3                        \
784
	test28.4                        \
784
	test28.4                        \
785
	test28.5                        \
785
	test28.5                        \
786
	test28.6			\
786
	test30.1			\
787
	test30.1			\
787
	test31.1			\
788
	test31.1			\
788
	usleep
789
	usleep
(-)a/testsuite/expect/README (+2 lines)
Lines 649-654 test28.4 Validates scontrol update command for a job array with Link Here
649
	   a job array index and the whole job array.
649
	   a job array index and the whole job array.
650
test28.5   Validates that scontrol can hold and release a whole job
650
test28.5   Validates that scontrol can hold and release a whole job
651
	   array or an individual jobid index.
651
	   array or an individual jobid index.
652
test28.6   Validates that when a job array is submitted to multiple
653
	   partitions that the jobs run on them.
652
654
653
test29.#   Reserved.
655
test29.#   Reserved.
654
====================
656
====================
(-)a/testsuite/expect/test28.5 (-2 / +2 lines)
Lines 54-60 proc hold_job {job} { Link Here
54
	expect {
54
	expect {
55
		timeout {
55
		timeout {
56
			send_user "\nFAILURE: scontrol not responding\n"
56
			send_user "\nFAILURE: scontrol not responding\n"
57
			set exit_code 0
57
			set exit_code 1
58
		}
58
		}
59
		eof {
59
		eof {
60
			wait
60
			wait
Lines 70-76 proc release_job {job} { Link Here
70
	expect {
70
	expect {
71
		timeout {
71
		timeout {
72
			send_user "\nFAILURE: scontrol not responding\n"
72
			send_user "\nFAILURE: scontrol not responding\n"
73
			set exit_code 0
73
			set exit_code 1
74
		}
74
		}
75
		eof {
75
		eof {
76
			wait
76
			wait
(-)a/testsuite/expect/test28.6 (+242 lines)
Line 0 Link Here
1
#!/usr/bin/expect
2
############################################################################
3
# Purpose: Test of SLURM functionality
4
#          test that when a job array is submitted to multiple
5
#          partitions that the jobs run on all the assigned partitions
6
#
7
#
8
# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
9
#          "FAILURE: ..." otherwise with an explanation of the failure, OR
10
#          anything else indicates a failure mode that must be investigated.
11
############################################################################
12
# Copyright (C) 2011-2013 SchedMD LLC
13
# Written by Nathan Yee <nyee32@schedmd.com>
14
#
15
# This file is part of SLURM, a resource management program.
16
# For details, see <http://slurm.schedmd.com/>.
17
# Please also read the included file: DISCLAIMER.
18
#
19
# SLURM is free software; you can redistribute it and/or modify it under
20
# the terms of the GNU General Public License as published by the Free
21
# Software Foundation; either version 2 of the License, or (at your option)
22
# any later version.
23
#
24
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
25
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
27
# details.
28
#
29
# You should have received a copy of the GNU General Public License along
30
# with SLURM; if not, write to the Free Software Foundation, Inc.,
31
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
32
############################################################################
33
source ./globals
34
35
set test_id       "test28.6"
36
set node_list     ""
37
set job_id_1      0
38
set job_id_2      0
39
set def_part      ""
40
set test_part     "$test_id\_part"
41
set script        "$test_id\_script"
42
set array_size    2
43
set exit_code     0
44
45
print_header $test_id
46
47
if {[get_array_config] < [expr $array_size + 1]} {
48
    send_user "\nWARNING: MaxArraySize is to small for this test\n"
49
    exit 0
50
}
51
52
make_bash_script $script "
53
sleep 100
54
"
55
56
proc check_job { job_id } {
57
58
    global scontrol array_size number exit_code
59
60
    for {set index 0} {$index<$array_size} {incr index} {
61
62
	set matches 0
63
	spawn $scontrol show job $job_id\_$index
64
	expect {
65
	    -re "JobState=RUNNING" {
66
		    incr matches
67
	    }
68
	    timeout {
69
		send_user "\nFAILURE scontrol is not responding\n"
70
		set exit_code 1
71
	    }
72
	    eof {
73
		wait
74
	    }
75
	}
76
	if {$matches != 1} {
77
	    send_user "\nFAILURE: job $job_id\_$index was not found\n"
78
	    set exit_code 1
79
	}
80
81
    }
82
83
}
84
85
#
86
# Get the default partition 
87
#
88
spawn $sinfo -h -o %P
89
expect {
90
    -re "($alpha_numeric_under)(\\*)" {
91
	set def_part $expect_out(1,string)
92
    }
93
    timeout {
94
	send_user "\nFAILURE: scontrol is not responding\n"
95
	set exit_code 1
96
    }
97
    eof {
98
	wait
99
    }
100
101
102
}
103
104
if {[string compare $def_part ""] == 0} {
105
    send_user "\nFAILURE: default partition was not found\n"
106
    set exit_code 1
107
}
108
109
#
110
# Get the available nodes from the partition
111
#
112
spawn $sinfo -h -o "%N" -p $def_part
113
expect {
114
    -re "($alpha_numeric_nodelist)" {
115
	set node_list $expect_out(1,string)
116
    }
117
    timeout {
118
	send_user "\nFAILURE: sinfo is not responding\n"
119
	set exit_code 1
120
    }
121
    eof {
122
	wait
123
    }
124
}
125
126
if {[string compare $node_list ""] == 0} {
127
    send_user "\nFAILURE: node list was not found\n"
128
    set exit_code 1
129
}
130
131
#
132
# Create partition
133
#
134
spawn $scontrol create PartitionName=$test_part Nodes=$node_list
135
expect {
136
    -re "error" {
137
	send_user"\nFAILURE: scontrol did not create partition\n"
138
	set exit_code 1
139
    }
140
    timeout {
141
	send_user "\nFAILURE: scontrol is not responding\n"
142
	set exit_code 1
143
    }
144
    eof {
145
	wait
146
    }
147
}
148
	
149
#
150
# Submit array job on default partition
151
#
152
spawn $sbatch -N1 -o /dev/null -e /dev/null --array=0-[expr $array_size -1] --partition=$def_part $script
153
expect {
154
    -re "Submitted batch job ($number)" {
155
	set job_id_1 $expect_out(1,string)
156
    }
157
    timeout {
158
	send_user "\nFAILURE: sbatch is not responding\n"
159
	set exit_code 1
160
    }
161
    eof {
162
	wait
163
    }
164
165
    if {$job_id_1 == 0} {
166
	send_user "\nFAILURE: sbatch did not submit job\n"
167
	set exit_code 1
168
    }
169
170
}
171
172
#
173
# Submit array job on new partition
174
#
175
spawn $sbatch -N1 -o /dev/null -e /dev/null --array=0-[expr $array_size -1] --partition=$test_part $script
176
expect {
177
    -re "Submitted batch job ($number)" {
178
	set job_id_2 $expect_out(1,string)
179
    } 
180
    timeout {
181
	send_user "\nFAILURE: sbatch is not responding\n"
182
	set exit_code 1
183
    }
184
    eof {
185
	wait
186
    }
187
188
    if {$job_id_2 == 0} {
189
	send_user "\nFAILURE: sbatch did not submit job\n"
190
	set exit_code 1
191
    }
192
193
}
194
195
#
196
# Wait for job to start
197
#
198
sleep 10
199
200
#
201
# Check that the job is running
202
#
203
check_job $job_id_1
204
205
check_job $job_id_2
206
207
#
208
# Cancel the jobs that are running on the new partition
209
#
210
211
cancel_job $job_id_2
212
213
#
214
# Cancel jobs that are running on the default partition
215
#
216
217
cancel_job $job_id_1
218
219
#
220
# Delete the new partition
221
#
222
223
spawn $scontrol delete partition=$test_part
224
expect {
225
    -re "error" {
226
	send_user "\nFAILURE: partition was not deleted\n"
227
	set exit_code 1
228
    }
229
    timeout {
230
	send_user "\nFAILURE: scontrol is not responding\n"
231
	set exit_code 1
232
    }
233
    eof {
234
	wait
235
    }
236
}
237
238
if {$exit_code == 0} {
239
    exec $bin_rm -f $script 
240
    send_user "\nSUCCESS\n"
241
}
242
exit $exit_code

Return to ticket 434