View | Details | Raw Unified | Return to ticket 434 | Differences between
and this patch

Collapse All | Expand All

(-)a/testsuite/expect/Makefile.am (+1 lines)
Lines 444-449 EXTRA_DIST = \ Link Here
444
	test28.3                        \
444
	test28.3                        \
445
	test28.4                        \
445
	test28.4                        \
446
	test28.5                        \
446
	test28.5                        \
447
	test28.6			\
447
	test30.1			\
448
	test30.1			\
448
	test31.1			\
449
	test31.1			\
449
	usleep
450
	usleep
(-)a/testsuite/expect/Makefile.in (+1 lines)
Lines 783-788 EXTRA_DIST = \ Link Here
783
	test28.3                        \
783
	test28.3                        \
784
	test28.4                        \
784
	test28.4                        \
785
	test28.5                        \
785
	test28.5                        \
786
	test28.6			\
786
	test30.1			\
787
	test30.1			\
787
	test31.1			\
788
	test31.1			\
788
	usleep
789
	usleep
(-)a/testsuite/expect/README (+2 lines)
Lines 649-654 test28.4 Validates scontrol update command for a job array with Link Here
649
	   a job array index and the whole job array.
649
	   a job array index and the whole job array.
650
test28.5   Validates that scontrol can hold and release a whole job
650
test28.5   Validates that scontrol can hold and release a whole job
651
	   array or an individual jobid index.
651
	   array or an individual jobid index.
652
test28.6   Validates that when a job array is submitted to multiple
653
	   partitions that the jobs run on them.
652
654
653
test29.#   Reserved.
655
test29.#   Reserved.
654
====================
656
====================
(-)a/testsuite/expect/test28.5 (-2 / +2 lines)
Lines 54-60 proc hold_job {job} { Link Here
54
	expect {
54
	expect {
55
		timeout {
55
		timeout {
56
			send_user "\nFAILURE: scontrol not responding\n"
56
			send_user "\nFAILURE: scontrol not responding\n"
57
			set exit_code 0
57
			set exit_code 1
58
		}
58
		}
59
		eof {
59
		eof {
60
			wait
60
			wait
Lines 70-76 proc release_job {job} { Link Here
70
	expect {
70
	expect {
71
		timeout {
71
		timeout {
72
			send_user "\nFAILURE: scontrol not responding\n"
72
			send_user "\nFAILURE: scontrol not responding\n"
73
			set exit_code 0
73
			set exit_code 1
74
		}
74
		}
75
		eof {
75
		eof {
76
			wait
76
			wait
(-)a/testsuite/expect/test28.6 (+245 lines)
Line 0 Link Here
1
#!/usr/bin/expect
2
############################################################################
3
# Purpose: Test of SLURM functionality
4
#          test that when a job array is submitted to multiple
5
#          partitions that the jobs run on all the assigned partitions
6
#
7
#
8
# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
9
#          "FAILURE: ..." otherwise with an explanation of the failure, OR
10
#          anything else indicates a failure mode that must be investigated.
11
############################################################################
12
# Copyright (C) 2011-2013 SchedMD LLC
13
# Written by Nathan Yee <nyee32@schedmd.com>
14
#
15
# This file is part of SLURM, a resource management program.
16
# For details, see <http://slurm.schedmd.com/>.
17
# Please also read the included file: DISCLAIMER.
18
#
19
# SLURM is free software; you can redistribute it and/or modify it under
20
# the terms of the GNU General Public License as published by the Free
21
# Software Foundation; either version 2 of the License, or (at your option)
22
# any later version.
23
#
24
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
25
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
27
# details.
28
#
29
# You should have received a copy of the GNU General Public License along
30
# with SLURM; if not, write to the Free Software Foundation, Inc.,
31
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
32
############################################################################
33
source ./globals
34
35
set test_id       "test28.6"
36
set node_list     ""
37
set job_id_1      0
38
set job_id_2      0
39
set def_part      ""
40
set test_part     "$test_id\_part"
41
set script        "$test_id\_script"
42
set array_size    2
43
set exit_code     0
44
45
print_header $test_id
46
47
if {[get_array_config] < [expr $array_size + 1]} {
48
    send_user "\nWARNING: MaxArraySize is to small for this test\n"
49
    exit 0
50
}
51
52
make_bash_script $script "
53
sleep 100
54
"
55
56
proc check_job { job_id } {
57
58
    global scontrol array_size number exit_code
59
60
    for {set index 0} {$index<$array_size} {incr index} {
61
62
	set matches 0
63
	spawn $scontrol show job $job_id\_$index
64
	expect {
65
	    -re "JobState=RUNNING" {
66
		incr matches
67
		exp_continue
68
	    }
69
	    timeout {
70
		send_user "\nFAILURE scontrol is not responding\n"
71
		set exit_code 1
72
	    }
73
	    eof {
74
		wait
75
	    }
76
	}
77
	if {$matches != 1} {
78
	    send_user "\nFAILURE: job $job_id\_$index was not found\n"
79
	    set exit_code 1
80
	}
81
82
    }
83
84
}
85
86
#
87
# Get the default partition 
88
#
89
spawn $sinfo -h -o %P
90
expect {
91
    -re "($alpha_numeric_under)(\\*)" {
92
	set def_part $expect_out(1,string)
93
	exp_continue
94
    }
95
    timeout {
96
	send_user "\nFAILURE: scontrol is not responding\n"
97
	set exit_code 1
98
    }
99
    eof {
100
	wait
101
    }
102
103
}
104
105
if {[string compare $def_part ""] == 0} {
106
    send_user "\nFAILURE: default partition was not found\n"
107
    set exit_code 1
108
}
109
110
#
111
# Get the available nodes from the partition
112
#
113
spawn $sinfo -h -o "%N" -p $def_part
114
expect {
115
    -re "($alpha_numeric_nodelist)" {
116
	set node_list $expect_out(1,string)
117
	exp_continue
118
    }
119
    timeout {
120
	send_user "\nFAILURE: sinfo is not responding\n"
121
	set exit_code 1
122
    }
123
    eof {
124
	wait
125
    }
126
}
127
128
if {[string compare $node_list ""] == 0} {
129
    send_user "\nFAILURE: node list was not found\n"
130
    set exit_code 1
131
}
132
133
#
134
# Create partition
135
#
136
spawn $scontrol create PartitionName=$test_part Nodes=$node_list
137
expect {
138
    -re "error" {
139
	send_user"\nFAILURE: scontrol did not create partition\n"
140
	set exit_code 1
141
	exp_continue
142
    }
143
    timeout {
144
	send_user "\nFAILURE: scontrol is not responding\n"
145
	set exit_code 1
146
    }
147
    eof {
148
	wait
149
    }
150
}
151
	
152
#
153
# Submit array job on default partition
154
#
155
spawn $sbatch -N1 -o /dev/null -e /dev/null --array=0-[expr $array_size -1] --partition=$def_part $script
156
expect {
157
    -re "Submitted batch job ($number)" {
158
	set job_id_1 $expect_out(1,string)
159
	exp_continue
160
    }
161
    timeout {
162
	send_user "\nFAILURE: sbatch is not responding\n"
163
	set exit_code 1
164
    }
165
    eof {
166
	wait
167
    }
168
169
    if {$job_id_1 == 0} {
170
	send_user "\nFAILURE: sbatch did not submit job\n"
171
	set exit_code 1
172
    }
173
174
}
175
176
#
177
# Submit array job on new partition
178
#
179
spawn $sbatch -N1 -o /dev/null -e /dev/null --array=0-[expr $array_size -1] --partition=$test_part $script
180
expect {
181
    -re "Submitted batch job ($number)" {
182
	set job_id_2 $expect_out(1,string)
183
	exp_continue
184
    } 
185
    timeout {
186
	send_user "\nFAILURE: sbatch is not responding\n"
187
	set exit_code 1
188
    }
189
    eof {
190
	wait
191
    }
192
193
    if {$job_id_2 == 0} {
194
	send_user "\nFAILURE: sbatch did not submit job\n"
195
	set exit_code 1
196
    }
197
198
}
199
200
#
201
# Wait for job to start
202
#
203
sleep 10
204
205
#
206
# Check that the job is running
207
#
208
check_job $job_id_1
209
210
check_job $job_id_2
211
212
#
213
# Cancel the jobs that are running on the new partition
214
#
215
cancel_job $job_id_2
216
217
#
218
# Cancel jobs that are running on the default partition
219
#
220
cancel_job $job_id_1
221
222
#
223
# Delete the new partition
224
#
225
spawn $scontrol delete partition=$test_part
226
expect {
227
    -re "error" {
228
	send_user "\nFAILURE: partition was not deleted\n"
229
	set exit_code 1
230
	exp_continue
231
    }
232
    timeout {
233
	send_user "\nFAILURE: scontrol is not responding\n"
234
	set exit_code 1
235
    }
236
    eof {
237
	wait
238
    }
239
}
240
241
if {$exit_code == 0} {
242
    exec $bin_rm -f $script 
243
    send_user "\nSUCCESS\n"
244
}
245
exit $exit_code

Return to ticket 434