|
Lines 116-151
static uint64_t debug_flags = 0;
Link Here
|
| 116 |
static pthread_mutex_t lustre_lock = PTHREAD_MUTEX_INITIALIZER; |
116 |
static pthread_mutex_t lustre_lock = PTHREAD_MUTEX_INITIALIZER; |
| 117 |
static int tres_pos = -1; |
117 |
static int tres_pos = -1; |
| 118 |
|
118 |
|
| 119 |
/* Default path to lustre stats */ |
|
|
| 120 |
const char proc_base_path[] = "/proc/fs/lustre"; |
| 121 |
|
119 |
|
| 122 |
/** |
120 |
/* _llite_path() |
| 123 |
* is lustre fs supported |
121 |
* |
| 124 |
**/ |
122 |
* returns the path to Lustre clients stats (depends on Lustre version) |
|
|
123 |
* |
| 124 |
*/ |
| 125 |
static char *_llite_path(void) |
| 126 |
{ |
| 127 |
static char llite_path[PATH_MAX]; |
| 128 |
DIR *llite_dir; |
| 129 |
|
| 130 |
// test /proc |
| 131 |
sprintf(llite_path, "/proc/fs/lustre/llite"); |
| 132 |
llite_dir = opendir(llite_path); |
| 133 |
|
| 134 |
if (!llite_dir) { |
| 135 |
debug("%s: unable to open %s %m", __func__, llite_path); |
| 136 |
|
| 137 |
// try /sys |
| 138 |
sprintf(llite_path, "/sys/kernel/debug/lustre/llite"); |
| 139 |
llite_dir = opendir(llite_path); |
| 140 |
if (!llite_dir) { |
| 141 |
debug("%s: unable to open %s %m", __func__, llite_path); |
| 142 |
|
| 143 |
// bail |
| 144 |
return NULL; |
| 145 |
} |
| 146 |
} |
| 147 |
|
| 148 |
closedir(llite_dir); |
| 149 |
return llite_path; |
| 150 |
} |
| 151 |
|
| 152 |
|
| 153 |
/* |
| 154 |
* _check_lustre_fs() |
| 155 |
* |
| 156 |
* check if Lustre is supported |
| 157 |
* |
| 158 |
*/ |
| 125 |
static int _check_lustre_fs(void) |
159 |
static int _check_lustre_fs(void) |
| 126 |
{ |
160 |
{ |
| 127 |
static bool set = false; |
161 |
static bool set = false; |
| 128 |
static int rc = SLURM_SUCCESS; |
162 |
static int rc = SLURM_SUCCESS; |
|
|
163 |
static char* llite_path; |
| 129 |
|
164 |
|
| 130 |
if (!set) { |
165 |
if (!set) { |
| 131 |
uint32_t profile = 0; |
166 |
uint32_t profile = 0; |
| 132 |
char lustre_directory[BUFSIZ]; |
|
|
| 133 |
DIR *proc_dir; |
| 134 |
|
167 |
|
| 135 |
set = true; |
168 |
set = true; |
| 136 |
acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, |
169 |
acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING, |
| 137 |
&profile); |
170 |
&profile); |
| 138 |
if ((profile & ACCT_GATHER_PROFILE_LUSTRE)) { |
171 |
if ((profile & ACCT_GATHER_PROFILE_LUSTRE)) { |
| 139 |
snprintf(lustre_directory, BUFSIZ, |
172 |
llite_path = _llite_path(); |
| 140 |
"%s/llite", proc_base_path); |
173 |
if (!llite_path) { |
| 141 |
proc_dir = opendir(proc_base_path); |
174 |
error("%s: can't find Lustre stats", __func__); |
| 142 |
if (!proc_dir) { |
175 |
rc = SLURM_ERROR; |
| 143 |
error("%s: not able to read %s %m", |
|
|
| 144 |
__func__, lustre_directory); |
| 145 |
rc = SLURM_FAILURE; |
| 146 |
} else { |
176 |
} else { |
| 147 |
closedir(proc_dir); |
177 |
debug("%s: using Lustre stats in %s", __func__, llite_path); |
|
|
178 |
rc = SLURM_SUCCESS; |
| 148 |
} |
179 |
} |
|
|
180 |
|
| 149 |
} else |
181 |
} else |
| 150 |
rc = SLURM_ERROR; |
182 |
rc = SLURM_ERROR; |
| 151 |
} |
183 |
} |
|
Lines 153-163
static int _check_lustre_fs(void)
Link Here
|
| 153 |
return rc; |
185 |
return rc; |
| 154 |
} |
186 |
} |
| 155 |
|
187 |
|
|
|
188 |
|
| 156 |
/* _read_lustre_counters() |
189 |
/* _read_lustre_counters() |
|
|
190 |
* |
| 157 |
* Read counters from all mounted lustre fs |
191 |
* Read counters from all mounted lustre fs |
| 158 |
* from the file stats under the directories: |
192 |
* from the file stats under the directories: |
| 159 |
* |
193 |
* |
| 160 |
* /proc/fs/lustre/llite/lustre-xxxx |
194 |
* /proc/fs/lustre/llite/lustre-xxxx |
|
|
195 |
* or |
| 196 |
* /sys/kernel/debug/lustre/llite/lustre-xxxx |
| 161 |
* |
197 |
* |
| 162 |
* From the file stat we use 2 entries: |
198 |
* From the file stat we use 2 entries: |
| 163 |
* |
199 |
* |
|
Lines 168-188
static int _check_lustre_fs(void)
Link Here
|
| 168 |
static int _read_lustre_counters(void) |
204 |
static int _read_lustre_counters(void) |
| 169 |
{ |
205 |
{ |
| 170 |
char lustre_dir[PATH_MAX]; |
206 |
char lustre_dir[PATH_MAX]; |
| 171 |
DIR *proc_dir; |
207 |
DIR *llite_dir; |
| 172 |
struct dirent *entry; |
208 |
struct dirent *entry; |
| 173 |
FILE *fff; |
209 |
FILE *fff; |
| 174 |
char buffer[BUFSIZ]; |
210 |
char buffer[BUFSIZ]; |
|
|
211 |
static char* llite_path; |
| 175 |
|
212 |
|
|
|
213 |
llite_path = _llite_path(); |
| 214 |
if (!llite_path) { |
| 215 |
error("%s: can't find Lustre stats", __func__); |
| 216 |
return SLURM_ERROR; |
| 217 |
} |
| 218 |
debug("%s: using Lustre stats in %s", __func__, llite_path); |
| 176 |
|
219 |
|
| 177 |
snprintf(lustre_dir, PATH_MAX, "%s/llite", proc_base_path); |
220 |
snprintf(lustre_dir, PATH_MAX, llite_path); |
| 178 |
|
221 |
|
| 179 |
proc_dir = opendir(lustre_dir); |
222 |
llite_dir = opendir(lustre_dir); |
| 180 |
if (proc_dir == NULL) { |
223 |
if (llite_dir == NULL) { |
| 181 |
error("%s: Cannot open %s %m", __func__, lustre_dir); |
224 |
error("%s: Cannot open %s %m", __func__, lustre_dir); |
| 182 |
return SLURM_FAILURE; |
225 |
return SLURM_ERROR; |
| 183 |
} |
226 |
} |
| 184 |
|
227 |
|
| 185 |
while ((entry = readdir(proc_dir))) { |
228 |
while ((entry = readdir(llite_dir))) { |
| 186 |
char *path_stats = NULL; |
229 |
char *path_stats = NULL; |
| 187 |
bool bread; |
230 |
bool bread; |
| 188 |
bool bwrote; |
231 |
bool bwrote; |
|
Lines 257-264
static int _read_lustre_counters(void)
Link Here
|
| 257 |
__func__, lustre_se.all_lustre_nb_writes, |
300 |
__func__, lustre_se.all_lustre_nb_writes, |
| 258 |
lustre_se.all_lustre_nb_reads); |
301 |
lustre_se.all_lustre_nb_reads); |
| 259 |
|
302 |
|
| 260 |
} /* while ((entry = readdir(proc_dir))) */ |
303 |
} /* while ((entry = readdir(llite_dir))) */ |
| 261 |
closedir(proc_dir); |
304 |
closedir(llite_dir); |
| 262 |
|
305 |
|
| 263 |
lustre_se.last_update_time = lustre_se.update_time; |
306 |
lustre_se.last_update_time = lustre_se.update_time; |
| 264 |
lustre_se.update_time = time(NULL); |
307 |
lustre_se.update_time = time(NULL); |
|
Lines 269-277
static int _read_lustre_counters(void)
Link Here
|
| 269 |
|
312 |
|
| 270 |
|
313 |
|
| 271 |
|
314 |
|
| 272 |
/* |
315 |
/* _update_node_filesystem() |
| 273 |
* _thread_update_node_energy calls _read_ipmi_values and updates all values |
316 |
* |
| 274 |
* for node consumption |
317 |
* acct_gather_filesystem_p_node_update calls _update_node_filesystem and |
|
|
318 |
* updates all values for node Lustre usage |
| 319 |
* |
| 275 |
*/ |
320 |
*/ |
| 276 |
static int _update_node_filesystem(void) |
321 |
static int _update_node_filesystem(void) |
| 277 |
{ |
322 |
{ |
|
Lines 306-312
static int _update_node_filesystem(void)
Link Here
|
| 306 |
if (_read_lustre_counters() != SLURM_SUCCESS) { |
351 |
if (_read_lustre_counters() != SLURM_SUCCESS) { |
| 307 |
error("%s: Cannot read lustre counters", __func__); |
352 |
error("%s: Cannot read lustre counters", __func__); |
| 308 |
slurm_mutex_unlock(&lustre_lock); |
353 |
slurm_mutex_unlock(&lustre_lock); |
| 309 |
return SLURM_FAILURE; |
354 |
return SLURM_ERROR; |
| 310 |
} |
355 |
} |
| 311 |
|
356 |
|
| 312 |
if (first) { |
357 |
if (first) { |
|
Lines 387-392
extern int init(void)
Link Here
|
| 387 |
{ |
432 |
{ |
| 388 |
slurmdb_tres_rec_t tres_rec; |
433 |
slurmdb_tres_rec_t tres_rec; |
| 389 |
|
434 |
|
|
|
435 |
if (debug_flags & DEBUG_FLAG_FILESYSTEM) |
| 436 |
info("lustre: loaded"); |
| 437 |
|
| 390 |
if (!_run_in_daemon()) |
438 |
if (!_run_in_daemon()) |
| 391 |
return SLURM_SUCCESS; |
439 |
return SLURM_SUCCESS; |
| 392 |
|
440 |
|
|
Lines 406-412
extern int fini(void)
Link Here
|
| 406 |
return SLURM_SUCCESS; |
454 |
return SLURM_SUCCESS; |
| 407 |
|
455 |
|
| 408 |
if (debug_flags & DEBUG_FLAG_FILESYSTEM) |
456 |
if (debug_flags & DEBUG_FLAG_FILESYSTEM) |
| 409 |
info("lustre: ended"); |
457 |
info("lustre: unloaded"); |
| 410 |
|
458 |
|
| 411 |
return SLURM_SUCCESS; |
459 |
return SLURM_SUCCESS; |
| 412 |
} |
460 |
} |
|
Lines 454-460
extern int acct_gather_filesystem_p_get_data(acct_gather_data_t *data)
Link Here
|
| 454 |
if (_read_lustre_counters() != SLURM_SUCCESS) { |
502 |
if (_read_lustre_counters() != SLURM_SUCCESS) { |
| 455 |
error("%s: Cannot read lustre counters", __func__); |
503 |
error("%s: Cannot read lustre counters", __func__); |
| 456 |
slurm_mutex_unlock(&lustre_lock); |
504 |
slurm_mutex_unlock(&lustre_lock); |
| 457 |
return SLURM_FAILURE; |
505 |
return SLURM_ERROR; |
| 458 |
} |
506 |
} |
| 459 |
|
507 |
|
| 460 |
/* Obtain the current values read from all lustre-xxxx directories */ |
508 |
/* Obtain the current values read from all lustre-xxxx directories */ |
| 461 |
- |
|
|