From bd9b4a3b8561a34c1e62361b79c0817174110084 Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Tue, 24 Mar 2020 17:57:49 +0600 Subject: [PATCH] mpi/pmix: added support `PMIx_server_setup_application` API Signed-off-by: Boris Karasev --- src/plugins/mpi/pmix/pmixp_client.c | 113 ++++++++++++++++++++++++++++++++- src/plugins/mpi/pmix/pmixp_client_v2.c | 8 +++ 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/src/plugins/mpi/pmix/pmixp_client.c b/src/plugins/mpi/pmix/pmixp_client.c index e56c784..bbbd4f5 100644 --- a/src/plugins/mpi/pmix/pmixp_client.c +++ b/src/plugins/mpi/pmix/pmixp_client.c @@ -2,7 +2,7 @@ ** pmix_client.c - PMIx client communication code ***************************************************************************** * Copyright (C) 2014-2015 Artem Polyakov. All rights reserved. - * Copyright (C) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (C) 2015-2020 Mellanox Technologies. All rights reserved. * Written by Artem Polyakov . * * This file is part of Slurm, a resource management program. @@ -120,6 +120,14 @@ typedef struct { volatile int active; } register_caddy_t; +#if (HAVE_PMIX_VER == 4) +typedef struct { + pmix_status_t rc; + List lresp; + volatile int active; +} setup_app_caddy_t; +#endif + static void _release_cb(pmix_status_t status, void *cbdata) { slurm_mutex_lock(&_reg_mutex); @@ -129,6 +137,32 @@ static void _release_cb(pmix_status_t status, void *cbdata) slurm_mutex_unlock(&_reg_mutex); } +#if (HAVE_PMIX_VER == 4) +static void _setup_app_cb(pmix_status_t status, + pmix_info_t info[], size_t ninfo, + void *provided_cbdata, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + setup_app_caddy_t *caddy = (setup_app_caddy_t*)provided_cbdata; + pmix_info_t *kvp; + size_t n; + + xassert(NULL == setup_app_caddy->lresp); + + for (n=0; n < ninfo; n++) { + PMIXP_KVP_CREATE(kvp, info[n].key, &info[n].value.data, + info[n].value.type); + list_append(caddy->lresp, kvp); + } + caddy->active = 0; + + if (NULL != cbfunc) { + cbfunc(status, cbdata); + } + caddy->rc = status; +} +#endif + /* * general proc-level attributes */ @@ -357,6 +391,76 @@ err_exit: return; } +#if (HAVE_PMIX_VER == 4) +/* + * provide application-specific setup data + */ +static int _setup_application(List lresp) +{ + volatile int delay = 0; + while (delay) {sleep(1);} + + int rc = SLURM_SUCCESS; + int ninfo = 0; + pmix_info_t *info; + ListIterator it; + pmix_info_t *kvp; + setup_app_caddy_t *setup_app_caddy = xmalloc(sizeof(setup_app_caddy_t)); + + PMIX_INFO_CREATE(info, 2); + it = list_iterator_create(lresp); + while ((kvp = list_next(it))) { + if ((0 == strncmp(kvp->key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN)) || + (0 == strncmp(kvp->key, PMIX_PROC_MAP, + PMIX_MAX_KEYLEN))) { + PMIX_INFO_LOAD(&info[ninfo], kvp->key, &kvp->value.data, + kvp->value.type); + ninfo++; + if (2 == ninfo) { + break; + } + } + } + if (2 < ninfo) { + PMIXP_ERROR("failed setup application: not sufficient arguments"); + rc = SLURM_ERROR; + goto exit; + + } + setup_app_caddy->lresp = lresp; + setup_app_caddy->active = 1; + if (PMIX_SUCCESS != (rc = PMIx_server_setup_application( + pmixp_info_namespace(), info, ninfo, + _setup_app_cb, setup_app_caddy))) { + PMIXP_ERROR("PMIx_server_setup_application failed with error: %d", + rc); + rc = SLURM_ERROR; + goto exit; + } + + while (1) { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 10; + + if (!setup_app_caddy->active) { + break; + } + nanosleep(&ts, NULL); + } + if (PMIX_SUCCESS != setup_app_caddy->rc) { + PMIXP_ERROR("PMIx_server_setup_application callback function failed with error: %d", + setup_app_caddy->rc); + rc = SLURM_ERROR; + goto exit; + } +exit: + PMIX_INFO_FREE(info, 2); + xfree(setup_app_caddy); + return rc; +} +#endif + /* * Estimate the size of a buffer capable of holding the proc map for this job. * PMIx proc map string format: @@ -671,6 +775,13 @@ extern int pmixp_libpmix_job_set(void) _set_localinfo(lresp); +#if (HAVE_PMIX_VER == 4) + if (SLURM_SUCCESS != _setup_application(lresp)) { + list_destroy(lresp); + return SLURM_ERROR; + } +#endif + ninfo = list_count(lresp); PMIX_INFO_CREATE(info, ninfo); it = list_iterator_create(lresp); diff --git a/src/plugins/mpi/pmix/pmixp_client_v2.c b/src/plugins/mpi/pmix/pmixp_client_v2.c index ddabf01..6cb32fe 100644 --- a/src/plugins/mpi/pmix/pmixp_client_v2.c +++ b/src/plugins/mpi/pmix/pmixp_client_v2.c @@ -231,6 +231,9 @@ int pmixp_lib_init(void) pmix_info_t *kvp = NULL; pmix_status_t rc; uint32_t jobuid = pmixp_info_jobuid(); +#ifdef PMIX_SERVER_SCHEDULER + bool flag = 1; +#endif PMIXP_KVP_ADD(kvp, PMIX_USERID, &jobuid, PMIX_UINT32); @@ -239,6 +242,11 @@ int pmixp_lib_init(void) pmixp_info_tmpdir_lib(), PMIX_STRING); #endif +#ifdef PMIX_SERVER_SCHEDULER + PMIXP_KVP_ADD(kvp, PMIX_SERVER_SCHEDULER, + &flag, PMIX_BOOL); +#endif + /* setup the server library */ if (PMIX_SUCCESS != (rc = PMIx_server_init(&slurm_pmix_cb, kvp, PMIXP_INFO_SIZE(kvp)))) { -- 1.9.1