View | Details | Raw Unified | Return to ticket 3318 | Differences between
and this patch

Collapse All | Expand All

(-)a/configure.ac (+1 lines)
Lines 419-424 AC_CONFIG_FILES([Makefile Link Here
419
		 contribs/Makefile
419
		 contribs/Makefile
420
		 contribs/cray/Makefile
420
		 contribs/cray/Makefile
421
		 contribs/cray/csm/Makefile
421
		 contribs/cray/csm/Makefile
422
		 contribs/cray/slurmsmwd/Makefile
422
		 contribs/lua/Makefile
423
		 contribs/lua/Makefile
423
		 contribs/mic/Makefile
424
		 contribs/mic/Makefile
424
		 contribs/pam/Makefile
425
		 contribs/pam/Makefile
(-)a/contribs/cray/Makefile.am (-2 / +3 lines)
Lines 2-10 Link Here
2
# Makefile for cray tools
2
# Makefile for cray tools
3
#
3
#
4
4
5
SUBDIRS = csm
5
SUBDIRS = csm slurmsmwd
6
6
7
AUTOMAKE_OPTIONS = foreign
7
AUTOMAKE_OPTIONS = foreign
8
sbin_PROGRAMS =
8
9
9
if HAVE_NATIVE_CRAY
10
if HAVE_NATIVE_CRAY
10
sbin_SCRIPTS = slurmconfgen.py
11
sbin_SCRIPTS = slurmconfgen.py
Lines 18-24 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(JSON_CPPFLAGS) Link Here
18
19
19
if WITH_JSON_PARSER
20
if WITH_JSON_PARSER
20
convenience_libs = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
21
convenience_libs = $(top_builddir)/src/api/libslurm.o $(DL_LIBS)
21
sbin_PROGRAMS = capmc_suspend capmc_resume
22
sbin_PROGRAMS += capmc_suspend capmc_resume
22
capmc_suspend_SOURCES  = capmc_suspend.c
23
capmc_suspend_SOURCES  = capmc_suspend.c
23
capmc_suspend_LDADD    = $(convenience_libs)
24
capmc_suspend_LDADD    = $(convenience_libs)
24
capmc_suspend_LDFLAGS  = -export-dynamic $(JSON_LDFLAGS)
25
capmc_suspend_LDFLAGS  = -export-dynamic $(JSON_LDFLAGS)
(-)a/contribs/cray/slurmsmwd/Makefile.am (+35 lines)
Line 0 Link Here
1
#
2
# Makefile for cray tools
3
#
4
5
AUTOMAKE_OPTIONS = foreign
6
7
AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/common $(JSON_CPPFLAGS)
8
9
sbin_PROGRAMS = slurmsmwd
10
slurmsmwd_SOURCES	= main.c read_config.c
11
slurmsmwd_LDADD = 					\
12
	$(top_builddir)/src/common/libdaemonize.la \
13
	$(top_builddir)/src/api/libslurm.o $(DL_LIBS)
14
slurmsmwd_LDFLAGS = -export-dynamic $(CMD_LDFLAGS)
15
16
17
ETC_FILES = slurmsmwd.service
18
19
CLEANFILES = $(ETC_FILES)
20
21
edit = sed \
22
        -e 's|@bindir[@]|$(bindir)|g' \
23
        -e 's|@libdir[@]|$(libdir)|g' \
24
        -e 's|@sbindir[@]|$(sbindir)|g' \
25
        -e 's|@sysconfdir[@]|$(sysconfdir)|g' \
26
        -e 's|@BLUEGENE_LOADED_FALSE[@]|$(BLUEGENE_LOADED_FALSE)|g'
27
28
noinst_DATA = $(ETC_FILES)
29
30
slurmsmwd.service: Makefile $(srcdir)/slurmsmwd.service.in
31
	$(edit) $(srcdir)/slurmsmwd.service.in > slurmsmwd.service
32
33
force:
34
$(slurmsmwd_LDADD) : force
35
	@cd `dirname $@` && $(MAKE) `basename $@`
(-)a/contribs/cray/slurmsmwd/main.c (+588 lines)
Line 0 Link Here
1
/*****************************************************************************\
2
 *  main.c - Primary logic for slurmsmwd
3
 *****************************************************************************
4
 *  Copyright (C) 2017 Regents of the University of California
5
 *  Written by Douglas Jacobsen <dmjacobsen@lbl.gov>
6
 *
7
 *  This file is part of SLURM, a resource management program.
8
 *  For details, see <https://slurm.schedmd.com>.
9
 *  Please also read the included file: DISCLAIMER.
10
 *
11
 *  SLURM is free software; you can redistribute it and/or modify it under
12
 *  the terms of the GNU General Public License as published by the Free
13
 *  Software Foundation; either version 2 of the License, or (at your option)
14
 *  any later version.
15
 *
16
 *  In addition, as a special exception, the copyright holders give permission
17
 *  to link the code of portions of this program with the OpenSSL library under
18
 *  certain conditions as described in each individual source file, and
19
 *  distribute linked combinations including the two. You must obey the GNU
20
 *  General Public License in all respects for all of the code used other than
21
 *  OpenSSL. If you modify file(s) with this exception, you may extend this
22
 *  exception to your version of the file(s), but you are not obligated to do
23
 *  so. If you do not wish to do so, delete this exception statement from your
24
 *  version.  If you delete this exception statement from all source files in
25
 *  the program, then also delete it here.
26
 *
27
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
28
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
30
 *  details.
31
 *
32
 *  You should have received a copy of the GNU General Public License along
33
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
34
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
35
\*****************************************************************************/
36
37
#define _GNU_SOURCE
38
#include <stdio.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <errno.h>
42
#include <unistd.h>
43
#include <stdarg.h>
44
#include <pthread.h>
45
#include <ctype.h>
46
#include <signal.h>
47
#include <sys/time.h>
48
#include <poll.h>
49
50
#include "src/common/slurm_xlator.h"
51
#include "slurm/slurm.h"
52
#include "src/common/macros.h"
53
#include "src/common/xmalloc.h"
54
#include "src/common/xstring.h"
55
#include "src/common/slurm_protocol_defs.h"
56
#include "src/common/daemonize.h"
57
#include "src/common/xsignal.h"
58
#include "src/common/log.h"
59
#include "src/common/proc_args.h"
60
61
#include "read_config.h"
62
63
#define MAX_POLL_WAIT 500
64
65
/* Local variables */
66
static int foreground = 0;
67
static log_options_t log_opts = 	/* Log to stderr & syslog */
68
	LOG_OPTS_INITIALIZER;
69
static int _sigarray[] = {	/* blocked signals for this process */
70
	SIGINT,  SIGTERM, SIGCHLD, SIGUSR1,
71
	SIGUSR2, SIGTSTP, SIGXCPU, SIGQUIT,
72
	SIGPIPE, SIGALRM, SIGABRT, SIGHUP, 0 };
73
static const char *xtconsumer_path = "/opt/cray/hss/default/bin/xtconsumer";
74
static int slurmsmwd_log_fmt = LOG_FMT_ISO8601_MS;
75
static pthread_t xtc_thread;
76
static pid_t xtc_pid = 0;
77
78
static int stop_running = 0;
79
static uint16_t debug_level = 0;
80
static pthread_mutex_t down_node_lock;
81
static int *down_node;
82
static size_t n_down_node;
83
static size_t down_node_sz;
84
static const char *event_description[] = {
85
	"Invalid Event",
86
	"ec_node_failed",
87
	"ec_node_unavailable"
88
};
89
90
typedef enum event_type {
91
	EVENT_INVALID = 0,
92
	EVENT_NODE_FAILED,
93
	EVENT_NODE_UNAVAILABLE,
94
	EVENT_LIMIT
95
} event_type_t;
96
97
static int _start_xtconsumer(char **xtc_argv, pid_t *pid);
98
99
void shutdown_threads(void) {
100
	stop_running = 1;
101
}
102
103
int getnid(const char *cname, int dim) {
104
        int cabinet, row, chassis, slot, node;
105
        int nodes_per_slot = 4;
106
        int nodes_per_chassis = nodes_per_slot * 16;
107
        int nodes_per_cabinet = nodes_per_chassis * 3;
108
        int nodes_per_row = nodes_per_cabinet * dim;
109
        sscanf(cname, "c%d-%dc%ds%dn%d",
110
                &cabinet, &row, &chassis, &slot, &node);
111
        return cabinet * nodes_per_cabinet + row * nodes_per_row +
112
                chassis * nodes_per_chassis + slot * nodes_per_slot + node;
113
}
114
115
char *getnidlist() {
116
	char *ret = NULL;
117
	size_t idx = 0;
118
	int last_nid = 0;
119
	int in_range = 0;
120
	ret = xstrdup("nid[");
121
	for (idx = 0; idx < n_down_node; idx++) {
122
		int curr_nid = down_node[idx];
123
		if (last_nid == 0) {
124
			xstrfmtcat(ret, "%05d", curr_nid);
125
		} else if (curr_nid == last_nid) {
126
			continue;
127
		} else if (curr_nid - last_nid > 1) {
128
			if (in_range) {
129
				xstrfmtcat(ret, "-%05d", last_nid);
130
			}
131
			xstrfmtcat(ret, ",%05d", curr_nid);
132
			in_range = 0;
133
		} else if (idx == n_down_node - 1) {
134
			xstrfmtcat(ret, "-%05d", curr_nid);
135
		} else {
136
			in_range = 1;
137
		}
138
		last_nid = curr_nid;
139
	}
140
	xstrfmtcat(ret, "]");
141
	return ret;
142
}
143
144
int _mark_nodes_down() {
145
	/* locks are assumed to be held */
146
	int rc = 0;
147
	update_node_msg_t *update_msg = xmalloc(sizeof(update_node_msg_t));
148
149
	slurm_init_update_node_msg(update_msg);
150
151
	update_msg->node_names = getnidlist();
152
	update_msg->node_state = NODE_STATE_NO_RESPOND;
153
154
	info("setting %s to NotResponding", update_msg->node_names);
155
156
	rc = slurm_update_node(update_msg);
157
	if (rc != SLURM_SUCCESS)
158
		error("failed to set %s to NotResponding: %m", update_msg->node_names);
159
160
	slurm_free_update_node_msg(update_msg);
161
	return rc;
162
163
}
164
165
void *process_data(void *arg) {
166
	while (!stop_running) {
167
		slurm_mutex_lock(&down_node_lock);
168
		if (n_down_node > 0) {
169
			slurm_info("down node cnt: %lu", n_down_node);
170
			_mark_nodes_down();
171
			n_down_node = 0;
172
		}
173
		slurm_mutex_unlock(&down_node_lock);
174
		usleep(2000000);
175
176
	}
177
	return NULL;
178
}
179
180
event_type_t _parse_event(const char *input) {
181
	if (strstr(input, "ec_node_failed") != NULL)
182
		return EVENT_NODE_FAILED;
183
	if (strstr(input, "ec_node_unavailable") != NULL)
184
		return EVENT_NODE_UNAVAILABLE;
185
	return EVENT_INVALID;
186
}
187
188
int _cmp_nid(const void *a, const void *b, void *arg) {
189
	int ai = * (const int *) a;
190
	int bi = * (const int *) b;
191
	return ai - bi;
192
}
193
194
char *_trim(char *str) {
195
    char *ptr = str;
196
    ssize_t len = 0;
197
    if (str == NULL) return NULL;
198
    for ( ; isspace(*ptr) && *ptr != 0; ptr++) {
199
        /* that's it */
200
    }
201
    if (*ptr == 0) return ptr;
202
    len = strlen(ptr) - 1;
203
    for ( ; isspace(*(ptr + len)) && len > 0; len--) {
204
        *(ptr + len) = 0;
205
    }
206
    return ptr;
207
}
208
209
void _send_failed_nodes(char *nodelist) {
210
	char *search = nodelist;
211
	char *svptr = NULL;
212
	char *ptr = NULL;
213
	int nid = 0;
214
	slurm_mutex_lock(&down_node_lock);
215
	while ((ptr = strtok_r(search, " ", &svptr)) != NULL) {
216
		search = NULL;
217
		while (*ptr == ':')
218
			ptr++;
219
		ptr = _trim(ptr);
220
		if (strlen(ptr) == 0)
221
			continue;
222
		nid = getnid(ptr, slurmsmwd_cabinets_per_row);
223
		if (nid == 0)
224
			continue;
225
		if (n_down_node + 1 >= down_node_sz) {
226
			size_t alloc_quantity = (n_down_node + 1) * 2;
227
			size_t alloc_size = sizeof(int) * alloc_quantity;
228
			down_node = xrealloc(down_node, alloc_size);
229
			down_node_sz = alloc_quantity;
230
		}
231
		down_node[n_down_node++] = nid;
232
	}
233
	qsort_r(down_node, n_down_node, sizeof(int), _cmp_nid, NULL);
234
	slurm_mutex_unlock(&down_node_lock);
235
}
236
237
/*
238
2017-05-16 07:17:12|2017-05-16 07:17:12|0x40008063 - ec_node_failed|src=:1:s0|::c4-2c0s2n0 ::c4-2c0s2n2 ::c4-2c0s2n3
239
2017-05-16 07:17:12|2017-05-16 07:17:12|0x400020e8 - ec_node_unavailable|src=:1:s0|::c4-2c0s2n2
240
2017-05-16 08:11:01|2017-05-16 08:11:01|0x400020e8 - ec_node_unavailable|src=:1:s0|::c4-2c0s2n0 ::c4-2c0s2n1 ::c4-2c0s2n2 ::c4-2c0s2n3
241
*/
242
void *xtconsumer_listen(void *arg) {
243
	int xtc_fd = 0;
244
	char *xtc_argv[] = {
245
		"xtconsumer",
246
		"-b",
247
		"ec_node_unavailable",
248
		"ec_node_failed"
249
	};
250
	char *line_ptr = NULL;
251
	char *buffer = NULL;
252
	size_t buffer_sz = 0;
253
	size_t buffer_off = 0;
254
	struct pollfd fds;
255
	int i = 0;
256
	int status = 0;
257
258
	xtc_fd = _start_xtconsumer(xtc_argv, &xtc_pid);
259
	debug2("got xtc_pid: %d", xtc_pid);
260
261
	if (xtc_fd < 0) {
262
		error("failed to open xtconsumer: %s", slurm_strerror(slurm_get_errno()));
263
		return NULL;
264
	}
265
266
	/* xtconsumer seems to flush out its stdout on newline (typical)
267
	 * so reading line-by-line seems to be functional for this need
268
	 */
269
	buffer_sz = 1024;
270
	buffer = xmalloc(buffer_sz);
271
	while (!stop_running) {
272
273
		fds.fd = xtc_fd;
274
		fds.events = POLLIN | POLLHUP | POLLRDHUP;
275
		fds.revents = 0;
276
277
		i = poll(&fds, 1, MAX_POLL_WAIT);
278
		if (i == 0) {
279
			continue;
280
		} else if (i < 0) {
281
			error("poll(): %s", slurm_strerror(slurm_get_errno()));
282
			break;
283
		}
284
		if ((fds.revents & POLLIN) == 0)
285
			break;
286
		i = read(xtc_fd, buffer + buffer_off,
287
				 buffer_sz - buffer_off);
288
289
		debug3("read %d bytes", i);
290
		if (i == 0) {
291
			break;
292
		} else if (i < 0) {
293
			if (errno == EAGAIN)
294
				continue;
295
			error("read(): %s", slurm_strerror(slurm_get_errno()));
296
			break;
297
		}
298
		buffer_off += i;
299
		if (buffer_off + 1024 >= buffer_sz) {
300
			buffer_sz *= 2;
301
			buffer = xrealloc(buffer, buffer_sz);
302
		}
303
304
		/* NUL terminate the string to allow strchr to work
305
		 * buffer was expanded above to ensure there would be space
306
		 */
307
		buffer[buffer_off + 1] = '\0';
308
		while ((line_ptr = strchr(buffer, '\n')) != NULL) {
309
			event_type_t event = EVENT_INVALID;
310
			char *node_list = NULL;
311
			char *search = NULL;
312
			char *ptr = NULL;
313
			char *svptr = NULL;
314
			int token_idx = 0;
315
			*line_ptr = '\0';
316
			if (strlen(buffer) == 0)
317
				goto advance_line;
318
			debug3("got line: %s", buffer);
319
			search = buffer;
320
			while ((ptr = strtok_r(search, "|", &svptr)) != NULL) {
321
				search = NULL;
322
				if (token_idx == 2)
323
					event = _parse_event(ptr);
324
				if (token_idx == 4)
325
					node_list = xstrdup(ptr);
326
327
				token_idx++;
328
			}
329
330
			if (event == EVENT_NODE_FAILED ||
331
			    event == EVENT_NODE_UNAVAILABLE) {
332
				info("received event: %s, nodelist: %s",
333
				     event_description[event], node_list);
334
				_send_failed_nodes(node_list);
335
			}
336
337
			xfree(node_list);
338
			node_list = NULL;
339
340
advance_line:
341
			*line_ptr = '\n';
342
			line_ptr++;
343
			for (ptr = buffer; *line_ptr; ptr++, line_ptr++)
344
				*ptr = *line_ptr;
345
			*ptr = *line_ptr;
346
			buffer_off = ptr - buffer;
347
		}
348
349
350
351
	}
352
	info("killing xtconsumer pid %d", xtc_pid);
353
	killpg(xtc_pid, SIGTERM);
354
	usleep(10000);
355
	killpg(xtc_pid, SIGKILL);
356
	waitpid(xtc_pid, &status, 0);
357
	close(xtc_fd);
358
359
360
#if 0
361
cleanup_break:
362
		if (node_list)
363
			xfree(node_list);
364
		break;
365
#endif
366
	xfree(buffer);
367
	return NULL;
368
369
}
370
371
/* _usage - print a message describing the command line arguments */
372
static void _usage(char *prog_name)
373
{
374
        fprintf(stderr, "Usage: %s [OPTIONS]\n", prog_name);
375
        fprintf(stderr, "  -D         \t"
376
                "Run daemon in foreground.\n");
377
        fprintf(stderr, "  -h         \t"
378
                "Print this help message.\n");
379
        fprintf(stderr, "  -v         \t"
380
                "Verbose mode. Multiple -v's increase verbosity.\n");
381
        fprintf(stderr, "  -V         \t"
382
                "Print version information and exit.\n");
383
}
384
385
static void _parse_commandline(int argc, char **argv)
386
{
387
	int c = 0;
388
389
	opterr = 0;
390
	while ((c = getopt(argc, argv, "DhvV")) != -1)
391
		switch (c) {
392
		case 'D':
393
			foreground = 1;
394
			break;
395
		case 'h':
396
			_usage(argv[0]);
397
			exit(0);
398
			break;
399
		case 'v':
400
			debug_level++;
401
			break;
402
		case 'V':
403
			print_slurm_version();
404
			exit(0);
405
			break;
406
		default:
407
			_usage(argv[0]);
408
			exit(1);
409
		}
410
}
411
412
static void _update_logging(void)
413
{
414
415
        /* Preserve execute line arguments (if any) */
416
        if (debug_level) {
417
                slurmsmwd_debug_level = MIN(
418
                        (LOG_LEVEL_INFO + debug_level),
419
                        (LOG_LEVEL_END - 1));
420
        }
421
422
        log_opts.stderr_level  = slurmsmwd_debug_level;
423
        log_opts.logfile_level = slurmsmwd_debug_level;
424
        log_opts.syslog_level  = slurmsmwd_debug_level;
425
426
        if (foreground)
427
                log_opts.syslog_level = LOG_LEVEL_QUIET;
428
        else {
429
                log_opts.stderr_level = LOG_LEVEL_QUIET;
430
                if (slurmsmwd_log_file)
431
                        log_opts.syslog_level = LOG_LEVEL_QUIET;
432
        }
433
434
        log_alter(log_opts, SYSLOG_FACILITY_DAEMON, slurmsmwd_log_file);
435
        log_set_timefmt(slurmsmwd_log_fmt);
436
}
437
438
extern void reconfig(void)
439
{
440
        slurmsmwd_read_config();
441
        _update_logging();
442
}
443
444
/* Reset some signals to their default state to clear any
445
 * inherited signal states */
446
static void _default_sigaction(int sig)
447
{
448
        struct sigaction act;
449
450
        if (sigaction(sig, NULL, &act)) {
451
                error("sigaction(%d): %m", sig);
452
                return;
453
        }
454
        if (act.sa_handler != SIG_IGN)
455
                return;
456
457
        act.sa_handler = SIG_DFL;
458
        if (sigaction(sig, &act, NULL))
459
                error("sigaction(%d): %m", sig);
460
}
461
462
/* _signal_handler - Process daemon-wide signals */
463
static void *_signal_handler(void *no_data)
464
{
465
	int rc, sig;
466
	int sig_array[] = {SIGINT, SIGTERM, SIGHUP, SIGABRT, 0};
467
	sigset_t set;
468
469
	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
470
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
471
472
	/* Make sure no required signals are ignored (possibly inherited) */
473
	_default_sigaction(SIGINT);
474
	_default_sigaction(SIGTERM);
475
	_default_sigaction(SIGHUP);
476
	_default_sigaction(SIGABRT);
477
478
	while (1) {
479
		xsignal_sigset_create(sig_array, &set);
480
		rc = sigwait(&set, &sig);
481
		if (rc == EINTR)
482
			continue;
483
		switch (sig) {
484
		case SIGHUP:	/* kill -1 */
485
			info("Reconfigure signal (SIGHUP) received");
486
			reconfig();
487
			break;
488
		case SIGINT:	/* kill -2  or <CTRL-C> */
489
		case SIGTERM:	/* kill -15 */
490
			info("Terminate signal (SIGINT or SIGTERM) received");
491
			shutdown_threads();
492
			return NULL;	/* Normal termination */
493
		case SIGABRT:	/* abort */
494
			info("SIGABRT received");
495
			abort();	/* Should terminate here */
496
			shutdown_threads();
497
			return NULL;
498
		default:
499
			error("Invalid signal (%d) received", sig);
500
		}
501
	}
502
503
}
504
505
int main(int argc, char **argv) {
506
	pthread_t processing_thread, signal_handler_thread;
507
	pthread_attr_t thread_attr;
508
509
	_parse_commandline(argc, argv);
510
511
	log_init(argv[0], log_opts, LOG_DAEMON, NULL);
512
	reconfig();
513
	slurmsmwd_print_config();
514
515
	if (!foreground) {
516
		daemon(0, 0);
517
	}
518
	if (create_pidfile("/var/run/slurmsmwd.pid", 0) < 0)
519
		fatal("Unable to create pidfile /var/run/slurmswmd.pid");
520
	
521
	slurm_mutex_init(&down_node_lock);
522
523
        /* Create attached thread for signal handling */
524
	if (xsignal_block(_sigarray) < 0)
525
		error("Unable to block signals");
526
        slurm_attr_init(&thread_attr);
527
        if (pthread_create(&signal_handler_thread, &thread_attr,
528
			   _signal_handler, NULL))
529
                fatal("pthread_create %m");
530
        slurm_attr_destroy(&thread_attr);
531
532
	slurm_attr_init(&thread_attr);
533
	if (pthread_create(&processing_thread, &thread_attr,
534
			   &process_data, NULL))
535
		fatal("pthread_create %m");
536
	slurm_attr_destroy(&thread_attr);
537
538
	while (!stop_running) {
539
		slurm_attr_init(&thread_attr);
540
		if (pthread_create(&xtc_thread, &thread_attr,
541
				   &xtconsumer_listen, NULL))
542
			fatal("pthread_create %m");
543
		slurm_attr_destroy(&thread_attr);
544
		pthread_join(xtc_thread, NULL);
545
	}
546
547
	pthread_join(processing_thread, NULL);
548
	slurm_mutex_destroy(&down_node_lock);
549
	return 0;
550
}
551
	
552
static int _start_xtconsumer(char **xtc_argv, pid_t *pid)
553
{
554
	int cc, i;
555
	pid_t cpid;
556
	int pfd[2] = { -1, -1 };
557
558
	if (access(xtconsumer_path, R_OK | X_OK) < 0) {
559
		error("Can not execute: %s", xtconsumer_path);
560
		return -1;
561
	}
562
	if (pipe(pfd) != 0) {
563
		error("pipe(): %s", slurm_strerror(slurm_get_errno()));
564
		return -1;
565
	}
566
567
	if ((cpid = fork()) == 0) {
568
		cc = sysconf(_SC_OPEN_MAX);
569
		dup2(pfd[1], STDERR_FILENO);
570
		dup2(pfd[1], STDOUT_FILENO);
571
		for (i = 0; i < cc; i++) {
572
			if ((i != STDERR_FILENO) && (i != STDOUT_FILENO))
573
				close(i);
574
		}
575
		setpgid(0, 0);
576
		execv(xtconsumer_path, xtc_argv);
577
		error("execv(): %s", slurm_strerror(slurm_get_errno()));
578
		exit(127);
579
	} else if (cpid < 0) {
580
		close(pfd[0]);
581
		close(pfd[1]);
582
		error("fork(): %s", slurm_strerror(slurm_get_errno()));
583
		return -1;
584
	}
585
	*pid = cpid;
586
	close(pfd[1]);
587
	return pfd[0];
588
}
(-)a/contribs/cray/slurmsmwd/read_config.c (+102 lines)
Line 0 Link Here
1
/*****************************************************************************\
2
 *  read_config.c - Read configuration file for slurmwmwd
3
 *****************************************************************************
4
 *  Copyright (C) 2017 Regents of the University of California
5
 *  Written by Douglas Jacobsen <dmjacobsen@lbl.gov>
6
 *
7
 *  This file is part of SLURM, a resource management program.
8
 *  For details, see <https://slurm.schedmd.com>.
9
 *  Please also read the included file: DISCLAIMER.
10
 *
11
 *  SLURM is free software; you can redistribute it and/or modify it under
12
 *  the terms of the GNU General Public License as published by the Free
13
 *  Software Foundation; either version 2 of the License, or (at your option)
14
 *  any later version.
15
 *
16
 *  In addition, as a special exception, the copyright holders give permission
17
 *  to link the code of portions of this program with the OpenSSL library under
18
 *  certain conditions as described in each individual source file, and
19
 *  distribute linked combinations including the two. You must obey the GNU
20
 *  General Public License in all respects for all of the code used other than
21
 *  OpenSSL. If you modify file(s) with this exception, you may extend this
22
 *  exception to your version of the file(s), but you are not obligated to do
23
 *  so. If you do not wish to do so, delete this exception statement from your
24
 *  version.  If you delete this exception statement from all source files in
25
 *  the program, then also delete it here.
26
 *
27
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
28
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
30
 *  details.
31
 *
32
 *  You should have received a copy of the GNU General Public License along
33
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
34
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
35
\*****************************************************************************/
36
37
#include <stdlib.h>
38
#include <string.h>
39
#include <sys/stat.h>
40
#include <sys/types.h>
41
#include <unistd.h>
42
43
#include "src/common/slurm_xlator.h"	/* Must be first */
44
#include "src/common/parse_config.h"
45
#include "read_config.h"
46
47
/* Global variables */
48
uint16_t slurmsmwd_cabinets_per_row = 0;
49
uint16_t slurmsmwd_debug_level = LOG_LEVEL_INFO;
50
char *slurmsmwd_log_file = NULL;
51
52
static s_p_options_t slurmsmwd_options[] = {
53
	{"CabinetsPerRow", S_P_UINT16},
54
	{"DebugLevel", S_P_STRING},
55
	{"LogFile", S_P_STRING},
56
	{NULL}
57
};
58
59
extern void slurmsmwd_print_config(void)
60
{
61
	debug2("slurmsmwd configuration");
62
	debug2("CabinetsPerRow = %u", slurmsmwd_cabinets_per_row);
63
	debug2("DebugLevel     = %u", slurmsmwd_debug_level);
64
	debug2("LogFile        = %s", slurmsmwd_log_file);
65
}
66
67
static void _validate_config(void)
68
{
69
	if (slurmsmwd_cabinets_per_row == 0)
70
		fatal("slurmsmwd.conf: CabinetsPerRow must not be zero");
71
}
72
73
/* Load configuration file contents into global variables.
74
 * Call slurmsmwd_free_config to free memory. */
75
extern void slurmsmwd_read_config(void)
76
{
77
	char *config_file = NULL;
78
	char *temp_str = NULL;
79
	s_p_hashtbl_t *tbl = NULL;
80
	struct stat config_stat;
81
82
	config_file = get_extra_conf_path("slurmsmwd.conf");
83
	if (stat(config_file, &config_stat) < 0)
84
		fatal("Can't stat slurmsmwd.conf %s: %m", config_file);
85
	tbl = s_p_hashtbl_create(slurmsmwd_options);
86
	if (s_p_parse_file(tbl, NULL, config_file, false) == SLURM_ERROR)
87
		fatal("Can't parse slurmsmwd.conf %s: %m", config_file);
88
89
	s_p_get_uint16(&slurmsmwd_cabinets_per_row, "CabinetsPerRow", tbl);
90
	s_p_get_string(&slurmsmwd_log_file, "LogFile", tbl);
91
	if (s_p_get_string(&temp_str, "DebugLevel", tbl)) {
92
		slurmsmwd_debug_level = log_string2num(temp_str);
93
		if (slurmsmwd_debug_level == (uint16_t) NO_VAL)
94
			fatal("Invalid DebugLevel %s", temp_str);
95
		xfree(temp_str);
96
	}
97
98
	_validate_config();
99
100
	s_p_hashtbl_destroy(tbl);
101
	xfree(config_file);
102
}
(-)a/contribs/cray/slurmsmwd/read_config.h (+55 lines)
Line 0 Link Here
1
/*****************************************************************************\
2
 *  read_config.h - Define symbols used to read configuration file for
3
 *  slurmsmwd
4
 *****************************************************************************
5
 *  Copyright (C) 2017 Regents of the University of California
6
 *  Written by Douglas Jacobsen <dmjacobsen@lbl.gov>
7
 *
8
 *  This file is part of SLURM, a resource management program.
9
 *  For details, see <https://slurm.schedmd.com>.
10
 *  Please also read the included file: DISCLAIMER.
11
 *
12
 *  SLURM is free software; you can redistribute it and/or modify it under
13
 *  the terms of the GNU General Public License as published by the Free
14
 *  Software Foundation; either version 2 of the License, or (at your option)
15
 *  any later version.
16
 *
17
 *  In addition, as a special exception, the copyright holders give permission
18
 *  to link the code of portions of this program with the OpenSSL library under
19
 *  certain conditions as described in each individual source file, and
20
 *  distribute linked combinations including the two. You must obey the GNU
21
 *  General Public License in all respects for all of the code used other than
22
 *  OpenSSL. If you modify file(s) with this exception, you may extend this
23
 *  exception to your version of the file(s), but you are not obligated to do
24
 *  so. If you do not wish to do so, delete this exception statement from your
25
 *  version.  If you delete this exception statement from all source files in
26
 *  the program, then also delete it here.
27
 *
28
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
29
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
31
 *  details.
32
 *
33
 *  You should have received a copy of the GNU General Public License along
34
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
35
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
36
\*****************************************************************************/
37
38
#ifndef _HAVE_SLURMSMWD_READ_CONFIG_H
39
#define _HAVE_SLURMSMWD_READ_CONFIG_H
40
41
#include <inttypes.h>
42
#include <sys/types.h>
43
#include <unistd.h>
44
45
extern uint16_t slurmsmwd_cabinets_per_row;
46
extern uint16_t slurmsmwd_debug_level;
47
extern char *   slurmsmwd_log_file;
48
49
/* Configuration functions */
50
51
/* Load configuration file contents into global variables. */
52
extern void slurmsmwd_read_config(void);
53
extern void slurmsmwd_print_config(void);
54
55
#endif	/* _HAVE_SLURMSMWD_READ_CONFIG_H */
(-)a/contribs/cray/slurmsmwd/slurmsmwd.service.in (+13 lines)
Line 0 Link Here
1
[Unit]
2
Description=Cray SMW xtconsumer Slurm Helper daemon
3
After=network.target munge.service
4
ConditionPathExists=@sysconfdir@/slurmsmwd.conf
5
6
[Service]
7
Type=forking
8
ExecStart=@sbindir@/slurmsmwd
9
ExecReload=/bin/kill -HUP $MAINPID
10
PIDFile=/var/run/slurmsmwd.pid
11
12
[Install]
13
WantedBy=multi-user.target

Return to ticket 3318