View | Details | Raw Unified | Return to ticket 12004 | Differences between
and this patch

Collapse All | Expand All

(-)a/doc/man/man5/slurm.conf.5 (-2 / +2 lines)
Lines 1060-1067 Note that the \fBHealthCheckProgram\fR will be executed at the same time Link Here
1060
on all nodes to minimize its impact upon parallel programs.
1060
on all nodes to minimize its impact upon parallel programs.
1061
This program is will be killed if it does not terminate normally within
1061
This program is will be killed if it does not terminate normally within
1062
60 seconds.
1062
60 seconds.
1063
This program will also be executed when the slurmd daemon is first started and
1063
This program will also be executed when the slurmd daemon is first started 
1064
before it registers with the slurmctld daemon.
1064
(with a 20 minute timeout) before it registers with the slurmctld daemon.
1065
By default, no program will be executed.
1065
By default, no program will be executed.
1066
1066
1067
.TP
1067
.TP
(-)a/src/slurmd/slurmd/req.c (-1 / +1 lines)
Lines 3042-3048 static void _rpc_health_check(slurm_msg_t *msg) Link Here
3042
	}
3042
	}
3043
3043
3044
	if (rc == SLURM_SUCCESS)
3044
	if (rc == SLURM_SUCCESS)
3045
		rc = run_script_health_check();
3045
		rc = run_script_health_check(false);
3046
3046
3047
	/* Take this opportunity to enforce any job memory limits */
3047
	/* Take this opportunity to enforce any job memory limits */
3048
	_enforce_job_mem_limit();
3048
	_enforce_job_mem_limit();
(-)a/src/slurmd/slurmd/slurmd.c (-3 / +3 lines)
Lines 381-387 main (int argc, char **argv) Link Here
381
	slurm_conf_install_fork_handlers();
381
	slurm_conf_install_fork_handlers();
382
	record_launched_jobs();
382
	record_launched_jobs();
383
383
384
	run_script_health_check();
384
	run_script_health_check(true);
385
385
386
	slurm_thread_create_detached(NULL, _registration_engine, NULL);
386
	slurm_thread_create_detached(NULL, _registration_engine, NULL);
387
387
Lines 2611-2617 static void _resource_spec_fini(void) Link Here
2611
 * Returns the run result. If the health check program
2611
 * Returns the run result. If the health check program
2612
 * is not defined, returns success immediately.
2612
 * is not defined, returns success immediately.
2613
 */
2613
 */
2614
extern int run_script_health_check(void)
2614
extern int run_script_health_check(bool startup)
2615
{
2615
{
2616
	int rc = SLURM_SUCCESS;
2616
	int rc = SLURM_SUCCESS;
2617
2617
Lines 2621-2627 extern int run_script_health_check(void) Link Here
2621
		setenvf(&env, "SLURMD_NODENAME", "%s", conf->node_name);
2621
		setenvf(&env, "SLURMD_NODENAME", "%s", conf->node_name);
2622
2622
2623
		rc = run_script("health_check", slurm_conf.health_check_program,
2623
		rc = run_script("health_check", slurm_conf.health_check_program,
2624
				0, 60, env, 0);
2624
				0, startup ? 1200 : 60, env, 0);
2625
2625
2626
		env_array_free(env);
2626
		env_array_free(env);
2627
	}
2627
	}
(-)a/src/slurmd/slurmd/slurmd.h (-2 / +1 lines)
Lines 170-176 int send_registration_msg(uint32_t status, bool startup); Link Here
170
int save_cred_state(slurm_cred_ctx_t vctx);
170
int save_cred_state(slurm_cred_ctx_t vctx);
171
171
172
/* Run the health check program if configured */
172
/* Run the health check program if configured */
173
int run_script_health_check(void);
173
int run_script_health_check(bool startup);
174
174
175
/* Handler for SIGTERM; can also be called to shutdown the slurmd. */
175
/* Handler for SIGTERM; can also be called to shutdown the slurmd. */
176
void slurmd_shutdown(int signum);
176
void slurmd_shutdown(int signum);
177
- 

Return to ticket 12004