Lines 174-184
static pthread_cond_t launch_cond = PTHREAD_COND_INITIALIZER;
Link Here
|
174 |
pthread_t thread_ipmi_id_launcher = 0; |
174 |
pthread_t thread_ipmi_id_launcher = 0; |
175 |
pthread_t thread_ipmi_id_run = 0; |
175 |
pthread_t thread_ipmi_id_run = 0; |
176 |
|
176 |
|
177 |
/* |
|
|
178 |
* DCMI context cannot be reused between threads and this plugin can be called |
179 |
* from different slurmd threads, so we need the __thread specifier. |
180 |
*/ |
181 |
__thread ipmi_ctx_t ipmi_dcmi_ctx = NULL; |
182 |
static int dcmi_cnt = 0; |
177 |
static int dcmi_cnt = 0; |
183 |
|
178 |
|
184 |
static int _read_ipmi_dcmi_values(void); |
179 |
static int _read_ipmi_dcmi_values(void); |
Lines 212-231
static uint64_t _get_additional_consumption(time_t time0, time_t time1,
Link Here
|
212 |
/* |
207 |
/* |
213 |
* _open_dcmi_context opens the inband ipmi device for DCMI power reading |
208 |
* _open_dcmi_context opens the inband ipmi device for DCMI power reading |
214 |
*/ |
209 |
*/ |
215 |
static int _open_dcmi_context(void) |
210 |
static int _open_dcmi_context(ipmi_ctx_t *ipmi_dcmi_ctx) |
216 |
{ |
211 |
{ |
217 |
int ret; |
212 |
int ret; |
218 |
|
213 |
|
219 |
if (!dcmi_cnt || ipmi_dcmi_ctx) |
214 |
if (!dcmi_cnt || *ipmi_dcmi_ctx) |
220 |
return SLURM_SUCCESS; |
215 |
return SLURM_SUCCESS; |
221 |
|
216 |
|
222 |
ipmi_dcmi_ctx = ipmi_ctx_create(); |
217 |
*ipmi_dcmi_ctx = ipmi_ctx_create(); |
223 |
if (!ipmi_dcmi_ctx) { |
218 |
if (!*ipmi_dcmi_ctx) { |
224 |
error("Failed creating dcmi ipmi context"); |
219 |
error("Failed creating dcmi ipmi context"); |
225 |
return SLURM_ERROR; |
220 |
return SLURM_ERROR; |
226 |
} |
221 |
} |
227 |
|
222 |
|
228 |
ret = ipmi_ctx_find_inband(ipmi_dcmi_ctx, |
223 |
ret = ipmi_ctx_find_inband(*ipmi_dcmi_ctx, |
229 |
NULL, |
224 |
NULL, |
230 |
ipmi_config.disable_auto_probe, |
225 |
ipmi_config.disable_auto_probe, |
231 |
ipmi_config.driver_address, |
226 |
ipmi_config.driver_address, |
Lines 235-248
static int _open_dcmi_context(void)
Link Here
|
235 |
IPMI_FLAGS_DEFAULT); |
230 |
IPMI_FLAGS_DEFAULT); |
236 |
if (ret < 0) { |
231 |
if (ret < 0) { |
237 |
error("Error finding inband dcmi ipmi device: %s", |
232 |
error("Error finding inband dcmi ipmi device: %s", |
238 |
ipmi_ctx_errormsg(ipmi_dcmi_ctx)); |
233 |
ipmi_ctx_errormsg(*ipmi_dcmi_ctx)); |
239 |
ipmi_ctx_destroy(ipmi_dcmi_ctx); |
234 |
ipmi_ctx_destroy(*ipmi_dcmi_ctx); |
240 |
ipmi_dcmi_ctx = NULL; |
235 |
*ipmi_dcmi_ctx = NULL; |
241 |
return SLURM_ERROR; |
236 |
return SLURM_ERROR; |
242 |
} else if (!ret) { |
237 |
} else if (!ret) { |
243 |
error("No inband dcmi ipmi device found"); |
238 |
error("No inband dcmi ipmi device found"); |
244 |
ipmi_ctx_destroy(ipmi_dcmi_ctx); |
239 |
ipmi_ctx_destroy(*ipmi_dcmi_ctx); |
245 |
ipmi_dcmi_ctx = NULL; |
240 |
*ipmi_dcmi_ctx = NULL; |
246 |
return SLURM_ERROR; |
241 |
return SLURM_ERROR; |
247 |
} |
242 |
} |
248 |
|
243 |
|
Lines 261-266
static int _init_ipmi_config (void)
Link Here
|
261 |
* information. |
256 |
* information. |
262 |
*/ |
257 |
*/ |
263 |
unsigned int ipmimonitoring_init_flags = 0; |
258 |
unsigned int ipmimonitoring_init_flags = 0; |
|
|
259 |
ipmi_ctx_t ipmi_dcmi_ctx = NULL; |
264 |
memset(&ipmi_config, 0, sizeof(struct ipmi_monitoring_ipmi_config)); |
260 |
memset(&ipmi_config, 0, sizeof(struct ipmi_monitoring_ipmi_config)); |
265 |
ipmi_config.driver_type = (int) slurm_ipmi_conf.driver_type; |
261 |
ipmi_config.driver_type = (int) slurm_ipmi_conf.driver_type; |
266 |
ipmi_config.disable_auto_probe = |
262 |
ipmi_config.disable_auto_probe = |
Lines 341-348
static int _init_ipmi_config (void)
Link Here
|
341 |
/* sensor_reading_flags |= */ |
337 |
/* sensor_reading_flags |= */ |
342 |
/* IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; */ |
338 |
/* IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; */ |
343 |
|
339 |
|
344 |
if (_open_dcmi_context() != SLURM_SUCCESS) |
340 |
/* Fail fast if DCMI context can't be opened */ |
|
|
341 |
if (_open_dcmi_context(&ipmi_dcmi_ctx) != SLURM_SUCCESS) |
345 |
return SLURM_ERROR; |
342 |
return SLURM_ERROR; |
|
|
343 |
else if (ipmi_dcmi_ctx) { |
344 |
ipmi_ctx_close(ipmi_dcmi_ctx); |
345 |
ipmi_ctx_destroy(ipmi_dcmi_ctx); |
346 |
ipmi_dcmi_ctx = NULL; |
347 |
} |
346 |
|
348 |
|
347 |
return SLURM_SUCCESS; |
349 |
return SLURM_SUCCESS; |
348 |
} |
350 |
} |
Lines 478-487
static int _get_dcmi_power_reading(uint16_t dcmi_mode)
Link Here
|
478 |
fiid_obj_t dcmi_rs; |
480 |
fiid_obj_t dcmi_rs; |
479 |
int ret; |
481 |
int ret; |
480 |
|
482 |
|
481 |
if (!ipmi_dcmi_ctx) { |
483 |
ipmi_ctx_t ipmi_dcmi_ctx = NULL; |
482 |
error("%s: IPMI DCMI context not initialized", __func__); |
|
|
483 |
return SLURM_ERROR; |
484 |
} |
485 |
|
484 |
|
486 |
dcmi_rs = fiid_obj_create(tmpl_cmd_dcmi_get_power_reading_rs); |
485 |
dcmi_rs = fiid_obj_create(tmpl_cmd_dcmi_get_power_reading_rs); |
487 |
if (!dcmi_rs) { |
486 |
if (!dcmi_rs) { |
Lines 497-504
static int _get_dcmi_power_reading(uint16_t dcmi_mode)
Link Here
|
497 |
error("%s: DCMI mode %d not supported: ", __func__, dcmi_mode); |
496 |
error("%s: DCMI mode %d not supported: ", __func__, dcmi_mode); |
498 |
return SLURM_ERROR; |
497 |
return SLURM_ERROR; |
499 |
} |
498 |
} |
500 |
ret = ipmi_cmd_dcmi_get_power_reading(ipmi_dcmi_ctx, mode, |
499 |
|
|
|
500 |
if (_open_dcmi_context(&ipmi_dcmi_ctx) != SLURM_SUCCESS) { |
501 |
error("%s: Cannot open DCMI context", __func__); |
502 |
return SLURM_ERROR; |
503 |
} |
504 |
ret = ipmi_cmd_dcmi_get_power_reading(ipmi_dcmi_ctx, mode, |
501 |
mode_attributes, dcmi_rs); |
505 |
mode_attributes, dcmi_rs); |
|
|
506 |
ipmi_ctx_close(ipmi_dcmi_ctx); |
507 |
ipmi_ctx_destroy(ipmi_dcmi_ctx); |
508 |
ipmi_dcmi_ctx = NULL; |
502 |
if (ret < 0) { |
509 |
if (ret < 0) { |
503 |
error("%s: get DCMI power reading failed", __func__); |
510 |
error("%s: get DCMI power reading failed", __func__); |
504 |
fiid_obj_destroy(dcmi_rs); |
511 |
fiid_obj_destroy(dcmi_rs); |
Lines 746-756
static int _thread_init(void)
Link Here
|
746 |
int rc = SLURM_SUCCESS; |
753 |
int rc = SLURM_SUCCESS; |
747 |
uint16_t i; |
754 |
uint16_t i; |
748 |
|
755 |
|
749 |
if (!first && (_open_dcmi_context() != SLURM_SUCCESS)) { |
|
|
750 |
error("Cannot open dcmi context for this thread."); |
751 |
return SLURM_ERROR; |
752 |
} |
753 |
|
754 |
if (!first && ipmi_ctx) |
756 |
if (!first && ipmi_ctx) |
755 |
return first_init; |
757 |
return first_init; |
756 |
first = false; |
758 |
first = false; |
Lines 789-794
static int _thread_init(void)
Link Here
|
789 |
return rc; |
791 |
return rc; |
790 |
} |
792 |
} |
791 |
|
793 |
|
|
|
794 |
/* |
795 |
* _thread_cleanup handles closing of the DCMI context if necessary |
796 |
*/ |
797 |
static int _thread_cleanup(void) |
798 |
{ |
799 |
return SLURM_SUCCESS; |
800 |
} |
801 |
|
792 |
static int _ipmi_send_profile(void) |
802 |
static int _ipmi_send_profile(void) |
793 |
{ |
803 |
{ |
794 |
uint16_t i, j; |
804 |
uint16_t i, j; |
Lines 1091-1102
extern int fini(void)
Link Here
|
1091 |
ipmi_ctx = NULL; |
1101 |
ipmi_ctx = NULL; |
1092 |
} |
1102 |
} |
1093 |
|
1103 |
|
1094 |
if (ipmi_dcmi_ctx) { |
|
|
1095 |
ipmi_ctx_close(ipmi_dcmi_ctx); |
1096 |
ipmi_ctx_destroy(ipmi_dcmi_ctx); |
1097 |
ipmi_dcmi_ctx = NULL; |
1098 |
} |
1099 |
|
1100 |
reset_slurm_ipmi_conf(&slurm_ipmi_conf); |
1104 |
reset_slurm_ipmi_conf(&slurm_ipmi_conf); |
1101 |
|
1105 |
|
1102 |
slurm_mutex_unlock(&ipmi_mutex); |
1106 |
slurm_mutex_unlock(&ipmi_mutex); |