|
Lines 109-114
const uint32_t plugin_version = 91;
Link Here
|
| 109 |
#define PATH_MAX 256 |
109 |
#define PATH_MAX 256 |
| 110 |
#endif |
110 |
#endif |
| 111 |
|
111 |
|
|
|
112 |
/* One slurmstepd could be in the process of creating cgroups while another |
| 113 |
* slurmstepd is simultaneoulsy deleting cgroups for another step for that |
| 114 |
* same user. MAX_CGROUP_RETRY defines how many times that we retry creating |
| 115 |
* the user and job cgroup on an error of ENOENT. */ |
| 116 |
#ifndef MAX_CGROUP_RETRY |
| 117 |
#define MAX_CGROUP_RETRY 8 |
| 118 |
#endif |
| 119 |
|
| 112 |
static slurm_cgroup_conf_t slurm_cgroup_conf; |
120 |
static slurm_cgroup_conf_t slurm_cgroup_conf; |
| 113 |
|
121 |
|
| 114 |
static char user_cgroup_path[PATH_MAX]; |
122 |
static char user_cgroup_path[PATH_MAX]; |
|
Lines 143-148
int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
Link Here
|
| 143 |
{ |
151 |
{ |
| 144 |
/* we do it here as we do not have access to the conf structure */ |
152 |
/* we do it here as we do not have access to the conf structure */ |
| 145 |
/* in libslurm (src/common/xcgroup.c) */ |
153 |
/* in libslurm (src/common/xcgroup.c) */ |
|
|
154 |
int retry_count = 0; /* See MAX_CGROUP_RETRY description above */ |
| 146 |
xcgroup_t slurm_cg; |
155 |
xcgroup_t slurm_cg; |
| 147 |
char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend); |
156 |
char* pre = (char*) xstrdup(slurm_cgroup_conf.cgroup_prepend); |
| 148 |
#ifdef MULTIPLE_SLURMD |
157 |
#ifdef MULTIPLE_SLURMD |
|
Lines 216-226
int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
Link Here
|
| 216 |
getuid(), getgid()) != XCGROUP_SUCCESS) { |
225 |
getuid(), getgid()) != XCGROUP_SUCCESS) { |
| 217 |
return SLURM_ERROR; |
226 |
return SLURM_ERROR; |
| 218 |
} |
227 |
} |
| 219 |
if (xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) { |
|
|
| 220 |
xcgroup_destroy(&user_freezer_cg); |
| 221 |
|
| 222 |
return SLURM_ERROR; |
| 223 |
} |
| 224 |
|
228 |
|
| 225 |
/* create job cgroup in the freezer ns (it could already exist) */ |
229 |
/* create job cgroup in the freezer ns (it could already exist) */ |
| 226 |
if (xcgroup_create(&freezer_ns, &job_freezer_cg, |
230 |
if (xcgroup_create(&freezer_ns, &job_freezer_cg, |
|
Lines 229-239
int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
Link Here
|
| 229 |
xcgroup_destroy(&user_freezer_cg); |
233 |
xcgroup_destroy(&user_freezer_cg); |
| 230 |
return SLURM_ERROR; |
234 |
return SLURM_ERROR; |
| 231 |
} |
235 |
} |
| 232 |
if (xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) { |
|
|
| 233 |
xcgroup_destroy(&user_freezer_cg); |
| 234 |
xcgroup_destroy(&job_freezer_cg); |
| 235 |
return SLURM_ERROR; |
| 236 |
} |
| 237 |
|
236 |
|
| 238 |
/* create step cgroup in the freezer ns (it should not exists) */ |
237 |
/* create step cgroup in the freezer ns (it should not exists) */ |
| 239 |
if (xcgroup_create(&freezer_ns, &step_freezer_cg, |
238 |
if (xcgroup_create(&freezer_ns, &step_freezer_cg, |
|
Lines 243-249
int _slurm_cgroup_create(slurmd_job_t *job, uint64_t id, uid_t uid, gid_t gid)
Link Here
|
| 243 |
xcgroup_destroy(&job_freezer_cg); |
242 |
xcgroup_destroy(&job_freezer_cg); |
| 244 |
return SLURM_ERROR; |
243 |
return SLURM_ERROR; |
| 245 |
} |
244 |
} |
| 246 |
if (xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS) { |
245 |
|
|
|
246 |
retry: if ((xcgroup_instanciate(&user_freezer_cg) != XCGROUP_SUCCESS) || |
| 247 |
(xcgroup_instanciate(&job_freezer_cg) != XCGROUP_SUCCESS) || |
| 248 |
(xcgroup_instanciate(&step_freezer_cg) != XCGROUP_SUCCESS)) { |
| 249 |
if ((errno == ENOENT) && (++retry_count <= MAX_CGROUP_RETRY)) |
| 250 |
goto retry; |
| 247 |
xcgroup_destroy(&user_freezer_cg); |
251 |
xcgroup_destroy(&user_freezer_cg); |
| 248 |
xcgroup_destroy(&job_freezer_cg); |
252 |
xcgroup_destroy(&job_freezer_cg); |
| 249 |
xcgroup_destroy(&step_freezer_cg); |
253 |
xcgroup_destroy(&step_freezer_cg); |