View | Details | Raw Unified | Return to ticket 447 | Differences between
and this patch

Collapse All | Expand All

(-)a/NEWS (+3 lines)
Lines 13-18 documents those changes that are of interest to users and admins. Link Here
13
    systems.
13
    systems.
14
 -- Substantial performance improvement for systems with Shared=YES or FORCE
14
 -- Substantial performance improvement for systems with Shared=YES or FORCE
15
    and large numbers of running jobs (replace bubble sort with quick sort).
15
    and large numbers of running jobs (replace bubble sort with quick sort).
16
 -- proctrack/cgroup - Add locking to prevent race condition where one job step
17
    is ending for a user or job at the same time another job stepsis starting
18
    and the user or job container is deleted from under the starting job step.
16
19
17
* Changes in Slurm 2.6.4
20
* Changes in Slurm 2.6.4
18
========================
21
========================
(-)a/src/plugins/proctrack/cgroup/proctrack_cgroup.c (-19 / +30 lines)
Lines 109-122 const uint32_t plugin_version = 91; Link Here
109
#define PATH_MAX 256
109
#define PATH_MAX 256
110
#endif
110
#endif
111
111
112
/* One slurmstepd could be in the process of creating cgroups while another
113
 * slurmstepd is simultaneoulsy deleting cgroups for another step for that
114
 * same user. MAX_CGROUP_RETRY defines how many times that we retry creating
115
 * the user and job cgroup on an error of ENOENT. */
116
#ifndef MAX_CGROUP_RETRY
117
#define MAX_CGROUP_RETRY 8
118
#endif
119
120
static slurm_cgroup_conf_t slurm_cgroup_conf;
112
static slurm_cgroup_conf_t slurm_cgroup_conf;
121
113
122
static char user_cgroup_path[PATH_MAX];
114
static char user_cgroup_path[PATH_MAX];
Lines 125-130 static char jobstep_cgroup_path[PATH_MAX]; Link Here
125
117
126
static xcgroup_ns_t freezer_ns;
118
static xcgroup_ns_t freezer_ns;
127
119
120
static bool slurm_freezer_init = false;
121
static xcgroup_t slurm_freezer_cg;
128
static xcgroup_t user_freezer_cg;
122
static xcgroup_t user_freezer_cg;
129
static xcgroup_t job_freezer_cg;
123
static xcgroup_t job_freezer_cg;
130
static xcgroup_t step_freezer_cg;
124
static xcgroup_t step_freezer_cg;
Lines 151-158 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
151
{
145
{
152
	/* we do it here as we do not have access to the conf structure */
146
	/* we do it here as we do not have access to the conf structure */
153
	/* in libslurm (src/common/xcgroup.c) */
147
	/* in libslurm (src/common/xcgroup.c) */
154
	int retry_count = 0;	/* See MAX_CGROUP_RETRY description above */
155
	xcgroup_t slurm_cg;
156
	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
148
	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
157
#ifdef MULTIPLE_SLURMD
149
#ifdef MULTIPLE_SLURMD
158
	if ( conf->node_name != NULL )
150
	if ( conf->node_name != NULL )
Lines 164-179 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
164
#endif
156
#endif
165
157
166
	/* create slurm cgroup in the freezer ns (it could already exist) */
158
	/* create slurm cgroup in the freezer ns (it could already exist) */
167
	if (xcgroup_create(&freezer_ns, &slurm_cg,pre,
159
	if (xcgroup_create(&freezer_ns, &slurm_freezer_cg, pre,
168
			   getuid(), getgid()) != XCGROUP_SUCCESS) {
160
			   getuid(), getgid()) != XCGROUP_SUCCESS) {
169
		return SLURM_ERROR;
161
		return SLURM_ERROR;
170
	}
162
	}
171
	if (xcgroup_instanciate(&slurm_cg) != XCGROUP_SUCCESS) {
163
	if (xcgroup_instanciate(&slurm_freezer_cg) != XCGROUP_SUCCESS) {
172
		xcgroup_destroy(&slurm_cg);
164
		xcgroup_destroy(&slurm_freezer_cg);
173
		return SLURM_ERROR;
165
		return SLURM_ERROR;
174
	}
166
	}
175
	else
176
		xcgroup_destroy(&slurm_cg);
177
167
178
	/* build user cgroup relative path if not set (should not be) */
168
	/* build user cgroup relative path if not set (should not be) */
179
	if (*user_cgroup_path == '\0') {
169
	if (*user_cgroup_path == '\0') {
Lines 182-187 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
182
			error("unable to build uid %u cgroup relative "
172
			error("unable to build uid %u cgroup relative "
183
			      "path : %m", uid);
173
			      "path : %m", uid);
184
			xfree(pre);
174
			xfree(pre);
175
			xcgroup_destroy(&slurm_freezer_cg);
185
			return SLURM_ERROR;
176
			return SLURM_ERROR;
186
		}
177
		}
187
	}
178
	}
Lines 193-198 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
193
			      user_cgroup_path, job->jobid) >= PATH_MAX) {
184
			      user_cgroup_path, job->jobid) >= PATH_MAX) {
194
			error("unable to build job %u cgroup relative "
185
			error("unable to build job %u cgroup relative "
195
			      "path : %m", job->jobid);
186
			      "path : %m", job->jobid);
187
			xcgroup_destroy(&slurm_freezer_cg);
196
			return SLURM_ERROR;
188
			return SLURM_ERROR;
197
		}
189
		}
198
	}
190
	}
Lines 206-211 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
206
				error("proctrack/cgroup unable to build job step"
198
				error("proctrack/cgroup unable to build job step"
207
				      " %u.batch freezer cg relative path: %m",
199
				      " %u.batch freezer cg relative path: %m",
208
				      job->jobid);
200
				      job->jobid);
201
				xcgroup_destroy(&slurm_freezer_cg);
209
				return SLURM_ERROR;
202
				return SLURM_ERROR;
210
			}
203
			}
211
		} else {
204
		} else {
Lines 214-219 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
214
				error("proctrack/cgroup unable to build job step"
207
				error("proctrack/cgroup unable to build job step"
215
				      " %u.%u freezer cg relative path: %m",
208
				      " %u.%u freezer cg relative path: %m",
216
				      job->jobid, job->stepid);
209
				      job->jobid, job->stepid);
210
				xcgroup_destroy(&slurm_freezer_cg);
217
				return SLURM_ERROR;
211
				return SLURM_ERROR;
218
			}
212
			}
219
		}
213
		}
Lines 223-228 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
223
	if (xcgroup_create(&freezer_ns, &user_freezer_cg,
217
	if (xcgroup_create(&freezer_ns, &user_freezer_cg,
224
			    user_cgroup_path,
218
			    user_cgroup_path,
225
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
219
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
220
		xcgroup_destroy(&slurm_freezer_cg);
226
		return SLURM_ERROR;
221
		return SLURM_ERROR;
227
	}
222
	}
228
223
Lines 230-235 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
230
	if (xcgroup_create(&freezer_ns, &job_freezer_cg,
225
	if (xcgroup_create(&freezer_ns, &job_freezer_cg,
231
			    job_cgroup_path,
226
			    job_cgroup_path,
232
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
227
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
228
		xcgroup_destroy(&slurm_freezer_cg);
233
		xcgroup_destroy(&user_freezer_cg);
229
		xcgroup_destroy(&user_freezer_cg);
234
		return SLURM_ERROR;
230
		return SLURM_ERROR;
235
	}
231
	}
Lines 238-258 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
238
	if (xcgroup_create(&freezer_ns, &step_freezer_cg,
234
	if (xcgroup_create(&freezer_ns, &step_freezer_cg,
239
			    jobstep_cgroup_path,
235
			    jobstep_cgroup_path,
240
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
236
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
237
		xcgroup_destroy(&slurm_freezer_cg);
241
		xcgroup_destroy(&user_freezer_cg);
238
		xcgroup_destroy(&user_freezer_cg);
242
		xcgroup_destroy(&job_freezer_cg);
239
		xcgroup_destroy(&job_freezer_cg);
243
		return SLURM_ERROR;
240
		return SLURM_ERROR;
244
	}
241
	}
245
242
246
retry:	if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) ||
243
	xcgroup_lock(&slurm_freezer_cg);
244
	if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) ||
247
	    (xcgroup_instanciate(&job_freezer_cg)  != XCGROUP_SUCCESS) ||
245
	    (xcgroup_instanciate(&job_freezer_cg)  != XCGROUP_SUCCESS) ||
248
	    (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) {
246
	    (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) {
249
		if ((errno == ENOENT) && (++retry_count <= MAX_CGROUP_RETRY))
247
		xcgroup_unlock(&slurm_freezer_cg);
250
			goto retry;
248
		xcgroup_destroy(&slurm_freezer_cg);
251
		xcgroup_destroy(&user_freezer_cg);
249
		xcgroup_destroy(&user_freezer_cg);
252
		xcgroup_destroy(&job_freezer_cg);
250
		xcgroup_destroy(&job_freezer_cg);
253
		xcgroup_destroy(&step_freezer_cg);
251
		xcgroup_destroy(&step_freezer_cg);
254
		return SLURM_ERROR;
252
		return SLURM_ERROR;
255
	}
253
	}
254
	xcgroup_unlock(&slurm_freezer_cg);
255
	slurm_freezer_init = true;
256
256
257
	/* inhibit release agent for the step cgroup thus letting 
257
	/* inhibit release agent for the step cgroup thus letting 
258
	 * slurmstepd being able to add new pids to the container 
258
	 * slurmstepd being able to add new pids to the container 
Lines 264-272 retry: if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) || Link Here
264
264
265
int _slurm_cgroup_destroy(void)
265
int _slurm_cgroup_destroy(void)
266
{
266
{
267
	if (slurm_freezer_init)
268
		xcgroup_lock(&slurm_freezer_cg);
269
267
	if (jobstep_cgroup_path[0] != '\0') {
270
	if (jobstep_cgroup_path[0] != '\0') {
268
		if ( xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS )
271
		if ( xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS ) {
272
			if (slurm_freezer_init)
273
				xcgroup_unlock(&slurm_freezer_cg);
269
			return SLURM_ERROR;
274
			return SLURM_ERROR;
275
		}
270
		xcgroup_destroy(&step_freezer_cg);
276
		xcgroup_destroy(&step_freezer_cg);
271
	}
277
	}
272
278
Lines 280-285 int _slurm_cgroup_destroy(void) Link Here
280
		xcgroup_destroy(&user_freezer_cg);
286
		xcgroup_destroy(&user_freezer_cg);
281
	}
287
	}
282
288
289
	if (slurm_freezer_init) {
290
		xcgroup_unlock(&slurm_freezer_cg);
291
		xcgroup_destroy(&slurm_freezer_cg);
292
	}
293
283
	xcgroup_ns_destroy(&freezer_ns);
294
	xcgroup_ns_destroy(&freezer_ns);
284
295
285
	return SLURM_SUCCESS;
296
	return SLURM_SUCCESS;

Return to ticket 447