View | Details | Raw Unified | Return to ticket 447 | Differences between
and this patch

Collapse All | Expand All

(-)a/src/plugins/proctrack/cgroup/proctrack_cgroup.c (-11 / +15 lines)
Lines 109-114 const uint32_t plugin_version = 91; Link Here
109
#define PATH_MAX 256
109
#define PATH_MAX 256
110
#endif
110
#endif
111
111
112
/* One slurmstepd could be in the process of creating cgroups while another
113
 * slurmstepd is simultaneoulsy deleting cgroups for another step for that
114
 * same user. MAX_CGROUP_RETRY defines how many times that we retry creating
115
 * the user and job cgroup on an error of ENOENT. */
116
#ifndef MAX_CGROUP_RETRY
117
#define MAX_CGROUP_RETRY 8
118
#endif
119
112
static slurm_cgroup_conf_t slurm_cgroup_conf;
120
static slurm_cgroup_conf_t slurm_cgroup_conf;
113
121
114
static char user_cgroup_path[PATH_MAX];
122
static char user_cgroup_path[PATH_MAX];
Lines 143-148 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
143
{
151
{
144
	/* we do it here as we do not have access to the conf structure */
152
	/* we do it here as we do not have access to the conf structure */
145
	/* in libslurm (src/common/xcgroup.c) */
153
	/* in libslurm (src/common/xcgroup.c) */
154
	int retry_count = 0;	/* See MAX_CGROUP_RETRY description above */
146
	xcgroup_t slurm_cg;
155
	xcgroup_t slurm_cg;
147
	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
156
	char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend);
148
#ifdef MULTIPLE_SLURMD
157
#ifdef MULTIPLE_SLURMD
Lines 216-226 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
216
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
225
			    getuid(), getgid()) != XCGROUP_SUCCESS) {
217
		return SLURM_ERROR;
226
		return SLURM_ERROR;
218
	}
227
	}
219
	if (xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) {
220
		xcgroup_destroy(&user_freezer_cg);
221
222
		return SLURM_ERROR;
223
	}
224
228
225
	/* create job cgroup in the freezer ns (it could already exist) */
229
	/* create job cgroup in the freezer ns (it could already exist) */
226
	if (xcgroup_create(&freezer_ns, &job_freezer_cg,
230
	if (xcgroup_create(&freezer_ns, &job_freezer_cg,
Lines 229-239 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
229
		xcgroup_destroy(&user_freezer_cg);
233
		xcgroup_destroy(&user_freezer_cg);
230
		return SLURM_ERROR;
234
		return SLURM_ERROR;
231
	}
235
	}
232
	if (xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) {
233
		xcgroup_destroy(&user_freezer_cg);
234
		xcgroup_destroy(&job_freezer_cg);
235
		return SLURM_ERROR;
236
	}
237
236
238
	/* create step cgroup in the freezer ns (it should not exists) */
237
	/* create step cgroup in the freezer ns (it should not exists) */
239
	if (xcgroup_create(&freezer_ns, &step_freezer_cg,
238
	if (xcgroup_create(&freezer_ns, &step_freezer_cg,
Lines 243-249 int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid) Link Here
243
		xcgroup_destroy(&job_freezer_cg);
242
		xcgroup_destroy(&job_freezer_cg);
244
		return SLURM_ERROR;
243
		return SLURM_ERROR;
245
	}
244
	}
246
	if (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS) {
245
246
retry:	if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) ||
247
	    (xcgroup_instanciate(&job_freezer_cg)  != XCGROUP_SUCCESS) ||
248
	    (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) {
249
		if ((errno == ENOENT) && (++retry_count <= MAX_CGROUP_RETRY))
250
			goto retry;
247
		xcgroup_destroy(&user_freezer_cg);
251
		xcgroup_destroy(&user_freezer_cg);
248
		xcgroup_destroy(&job_freezer_cg);
252
		xcgroup_destroy(&job_freezer_cg);
249
		xcgroup_destroy(&step_freezer_cg);
253
		xcgroup_destroy(&step_freezer_cg);

Return to ticket 447