View | Details | Raw Unified | Return to ticket 447 | Differences between
and this patch

Collapse All | Expand All

(-)a/src/plugins/proctrack/cgroup/proctrack_cgroup.c (-14 / +9 lines)
Lines 109-122 const uint32_t plugin_version = 91; Link Here
109
#define PATH_MAX 256
109
#define PATH_MAX 256
110
#endif
110
#endif
111
111
112
/* One slurmstepd could be in the process of creating cgroups while another
113
 * slurmstepd is simultaneoulsy deleting cgroups for another step for that
114
 * same user. MAX_CGROUP_RETRY defines how many times that we retry creating
115
 * the user and job cgroup on an error of ENOENT. */
116
#ifndef MAX_CGROUP_RETRY
117
#define MAX_CGROUP_RETRY 8
118
#endif
119
120
static slurm_cgroup_conf_t slurm_cgroup_conf;
112
static slurm_cgroup_conf_t slurm_cgroup_conf;
121
113
122
static char user_cgroup_path[PATH_MAX];
114
static char user_cgroup_path[PATH_MAX];
Lines 151-157 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
151
{
143
{
152
	/* we do it here as we do not have access to the conf structure */
144
	/* we do it here as we do not have access to the conf structure */
153
	/* in libslurm (src/common/xcgroup.c) */
145
	/* in libslurm (src/common/xcgroup.c) */
154
	int retry_count = 0;	/* See MAX_CGROUP_RETRY description above */
155
	xcgroup_t slurm_cg;
146
	xcgroup_t slurm_cg;
156
	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
147
	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
157
#ifdef MULTIPLE_SLURMD
148
#ifdef MULTIPLE_SLURMD
Lines 243-258 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
243
		return SLURM_ERROR;
234
		return SLURM_ERROR;
244
	}
235
	}
245
236
246
retry:	if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) ||
237
	xcgroup_lock(&user_freezer_cg);
238
	if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) ||
247
	    (xcgroup_instanciate(&job_freezer_cg)  != XCGROUP_SUCCESS) ||
239
	    (xcgroup_instanciate(&job_freezer_cg)  != XCGROUP_SUCCESS) ||
248
	    (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) {
240
	    (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) {
249
		if ((errno == ENOENT) && (++retry_count <= MAX_CGROUP_RETRY))
241
		xcgroup_unlock(&user_freezer_cg);
250
			goto retry;
251
		xcgroup_destroy(&user_freezer_cg);
242
		xcgroup_destroy(&user_freezer_cg);
252
		xcgroup_destroy(&job_freezer_cg);
243
		xcgroup_destroy(&job_freezer_cg);
253
		xcgroup_destroy(&step_freezer_cg);
244
		xcgroup_destroy(&step_freezer_cg);
254
		return SLURM_ERROR;
245
		return SLURM_ERROR;
255
	}
246
	}
247
	xcgroup_unlock(&user_freezer_cg);
256
248
257
	/* inhibit release agent for the step cgroup thus letting 
249
	/* inhibit release agent for the step cgroup thus letting 
258
	 * slurmstepd being able to add new pids to the container 
250
	 * slurmstepd being able to add new pids to the container 
Lines 264-272 retry: if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) || Link Here
264
256
265
int _slurm_cgroup_destroy(void)
257
int _slurm_cgroup_destroy(void)
266
{
258
{
259
	xcgroup_lock(&user_freezer_cg);
267
	if (jobstep_cgroup_path[0] != '\0') {
260
	if (jobstep_cgroup_path[0] != '\0') {
268
		if ( xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS )
261
		if ( xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS ) {
262
			xcgroup_unlock(&user_freezer_cg);
269
			return SLURM_ERROR;
263
			return SLURM_ERROR;
264
		}
270
		xcgroup_destroy(&step_freezer_cg);
265
		xcgroup_destroy(&step_freezer_cg);
271
	}
266
	}
272
267
Lines 279-285 int _slurm_cgroup_destroy(void) Link Here
279
		xcgroup_delete(&user_freezer_cg);
274
		xcgroup_delete(&user_freezer_cg);
280
		xcgroup_destroy(&user_freezer_cg);
275
		xcgroup_destroy(&user_freezer_cg);
281
	}
276
	}
282
277
	xcgroup_unlock(&user_freezer_cg);
283
	xcgroup_ns_destroy(&freezer_ns);
278
	xcgroup_ns_destroy(&freezer_ns);
284
279
285
	return SLURM_SUCCESS;
280
	return SLURM_SUCCESS;

Return to ticket 447