|
Lines 109-122
const uint32_t plugin_version = 91;
Link Here
|
| 109 |
#define PATH_MAX 256 |
109 |
#define PATH_MAX 256 |
| 110 |
#endif |
110 |
#endif |
| 111 |
|
111 |
|
| 112 |
/* One slurmstepd could be in the process of creating cgroups while another |
|
|
| 113 |
* slurmstepd is simultaneoulsy deleting cgroups for another step for that |
| 114 |
* same user. MAX_CGROUP_RETRY defines how many times that we retry creating |
| 115 |
* the user and job cgroup on an error of ENOENT. */ |
| 116 |
#ifndef MAX_CGROUP_RETRY |
| 117 |
#define MAX_CGROUP_RETRY 8 |
| 118 |
#endif |
| 119 |
|
| 120 |
static slurm_cgroup_conf_t slurm_cgroup_conf; |
112 |
static slurm_cgroup_conf_t slurm_cgroup_conf; |
| 121 |
|
113 |
|
| 122 |
static char user_cgroup_path[PATH_MAX]; |
114 |
static char user_cgroup_path[PATH_MAX]; |
|
Lines 151-157
int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
Link Here
|
| 151 |
{ |
143 |
{ |
| 152 |
/* we do it here as we do not have access to the conf structure */ |
144 |
/* we do it here as we do not have access to the conf structure */ |
| 153 |
/* in libslurm (src/common/xcgroup.c) */ |
145 |
/* in libslurm (src/common/xcgroup.c) */ |
| 154 |
int retry_count = 0; /* See MAX_CGROUP_RETRY description above */ |
|
|
| 155 |
xcgroup_t slurm_cg; |
146 |
xcgroup_t slurm_cg; |
| 156 |
char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend); |
147 |
char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend); |
| 157 |
#ifdef MULTIPLE_SLURMD |
148 |
#ifdef MULTIPLE_SLURMD |
|
Lines 243-258
int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
Link Here
|
| 243 |
return SLURM_ERROR; |
234 |
return SLURM_ERROR; |
| 244 |
} |
235 |
} |
| 245 |
|
236 |
|
| 246 |
retry: if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) || |
237 |
xcgroup_lock(&user_freezer_cg); |
|
|
238 |
if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) || |
| 247 |
(xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) || |
239 |
(xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) || |
| 248 |
(xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) { |
240 |
(xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) { |
| 249 |
if ((errno == ENOENT) && (++retry_count <= MAX_CGROUP_RETRY)) |
241 |
xcgroup_unlock(&user_freezer_cg); |
| 250 |
goto retry; |
|
|
| 251 |
xcgroup_destroy(&user_freezer_cg); |
242 |
xcgroup_destroy(&user_freezer_cg); |
| 252 |
xcgroup_destroy(&job_freezer_cg); |
243 |
xcgroup_destroy(&job_freezer_cg); |
| 253 |
xcgroup_destroy(&step_freezer_cg); |
244 |
xcgroup_destroy(&step_freezer_cg); |
| 254 |
return SLURM_ERROR; |
245 |
return SLURM_ERROR; |
| 255 |
} |
246 |
} |
|
|
247 |
xcgroup_unlock(&user_freezer_cg); |
| 256 |
|
248 |
|
| 257 |
/* inhibit release agent for the step cgroup thus letting |
249 |
/* inhibit release agent for the step cgroup thus letting |
| 258 |
* slurmstepd being able to add new pids to the container |
250 |
* slurmstepd being able to add new pids to the container |
|
Lines 264-272
retry: if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) ||
Link Here
|
| 264 |
|
256 |
|
| 265 |
int _slurm_cgroup_destroy(void) |
257 |
int _slurm_cgroup_destroy(void) |
| 266 |
{ |
258 |
{ |
|
|
259 |
xcgroup_lock(&user_freezer_cg); |
| 267 |
if (jobstep_cgroup_path[0] != '\0') { |
260 |
if (jobstep_cgroup_path[0] != '\0') { |
| 268 |
if ( xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS ) |
261 |
if ( xcgroup_delete(&step_freezer_cg) != XCGROUP_SUCCESS ) { |
|
|
262 |
xcgroup_unlock(&user_freezer_cg); |
| 269 |
return SLURM_ERROR; |
263 |
return SLURM_ERROR; |
|
|
264 |
} |
| 270 |
xcgroup_destroy(&step_freezer_cg); |
265 |
xcgroup_destroy(&step_freezer_cg); |
| 271 |
} |
266 |
} |
| 272 |
|
267 |
|
|
Lines 279-285
int _slurm_cgroup_destroy(void)
Link Here
|
| 279 |
xcgroup_delete(&user_freezer_cg); |
274 |
xcgroup_delete(&user_freezer_cg); |
| 280 |
xcgroup_destroy(&user_freezer_cg); |
275 |
xcgroup_destroy(&user_freezer_cg); |
| 281 |
} |
276 |
} |
| 282 |
|
277 |
xcgroup_unlock(&user_freezer_cg); |
| 283 |
xcgroup_ns_destroy(&freezer_ns); |
278 |
xcgroup_ns_destroy(&freezer_ns); |
| 284 |
|
279 |
|
| 285 |
return SLURM_SUCCESS; |
280 |
return SLURM_SUCCESS; |