|
Lines 195-201
static void _reset_step_bitmaps(struct job_record *job_ptr);
Link Here
|
| 195 |
static int _resume_job_nodes(struct job_record *job_ptr, bool indf_susp); |
195 |
static int _resume_job_nodes(struct job_record *job_ptr, bool indf_susp); |
| 196 |
static void _send_job_kill(struct job_record *job_ptr); |
196 |
static void _send_job_kill(struct job_record *job_ptr); |
| 197 |
static int _set_job_id(struct job_record *job_ptr); |
197 |
static int _set_job_id(struct job_record *job_ptr); |
| 198 |
static void _signal_batch_job(struct job_record *job_ptr, uint16_t signal); |
198 |
static void _signal_batch_job(struct job_record *job_ptr, |
|
|
199 |
uint16_t signal, |
| 200 |
uint16_t flags); |
| 199 |
static void _signal_job(struct job_record *job_ptr, int signal); |
201 |
static void _signal_job(struct job_record *job_ptr, int signal); |
| 200 |
static void _suspend_job(struct job_record *job_ptr, uint16_t op, |
202 |
static void _suspend_job(struct job_record *job_ptr, uint16_t op, |
| 201 |
bool indf_susp); |
203 |
bool indf_susp); |
|
Lines 3680-3687
extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags,
Link Here
|
| 3680 |
} |
3682 |
} |
| 3681 |
|
3683 |
|
| 3682 |
if (IS_JOB_RUNNING(job_ptr)) { |
3684 |
if (IS_JOB_RUNNING(job_ptr)) { |
| 3683 |
if (signal == SIGKILL) { |
3685 |
if ((signal == SIGKILL) |
| 3684 |
/* No need to signal steps, deallocate kills them */ |
3686 |
&& !(flags & KILL_STEPS_ONLY) |
|
|
3687 |
&& !(flags & KILL_JOB_BATCH)) { |
| 3688 |
/* No need to signal steps, deallocate kills them |
| 3689 |
*/ |
| 3685 |
job_ptr->time_last_active = now; |
3690 |
job_ptr->time_last_active = now; |
| 3686 |
job_ptr->end_time = now; |
3691 |
job_ptr->end_time = now; |
| 3687 |
last_job_update = now; |
3692 |
last_job_update = now; |
|
Lines 3689-3699
extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags,
Link Here
|
| 3689 |
build_cg_bitmap(job_ptr); |
3694 |
build_cg_bitmap(job_ptr); |
| 3690 |
job_completion_logger(job_ptr, false); |
3695 |
job_completion_logger(job_ptr, false); |
| 3691 |
deallocate_nodes(job_ptr, false, false, preempt); |
3696 |
deallocate_nodes(job_ptr, false, false, preempt); |
| 3692 |
} else if (flags & KILL_JOB_BATCH) {// |
3697 |
} else if (job_ptr->batch_flag |
| 3693 |
if (job_ptr->batch_flag) |
3698 |
&& (flags & KILL_STEPS_ONLY |
| 3694 |
_signal_batch_job(job_ptr, signal); |
3699 |
|| flags & KILL_JOB_BATCH)) { |
| 3695 |
else |
3700 |
_signal_batch_job(job_ptr, signal, flags); |
| 3696 |
return ESLURM_JOB_SCRIPT_MISSING; |
3701 |
} else if ((flags & KILL_JOB_BATCH) && !job_ptr->batch_flag) { |
|
|
3702 |
return ESLURM_JOB_SCRIPT_MISSING; |
| 3697 |
} else { |
3703 |
} else { |
| 3698 |
_signal_job(job_ptr, signal); |
3704 |
_signal_job(job_ptr, signal); |
| 3699 |
} |
3705 |
} |
|
Lines 3708-3714
extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags,
Link Here
|
| 3708 |
} |
3714 |
} |
| 3709 |
|
3715 |
|
| 3710 |
static void |
3716 |
static void |
| 3711 |
_signal_batch_job(struct job_record *job_ptr, uint16_t signal) |
3717 |
_signal_batch_job(struct job_record *job_ptr, uint16_t signal, uint16_t flags) |
| 3712 |
{ |
3718 |
{ |
| 3713 |
bitoff_t i; |
3719 |
bitoff_t i; |
| 3714 |
kill_tasks_msg_t *kill_tasks_msg = NULL; |
3720 |
kill_tasks_msg_t *kill_tasks_msg = NULL; |
|
Lines 3719-3726
_signal_batch_job(struct job_record *job_ptr, uint16_t signal)
Link Here
|
| 3719 |
xassert(job_ptr->batch_host); |
3725 |
xassert(job_ptr->batch_host); |
| 3720 |
i = bit_ffs(job_ptr->node_bitmap); |
3726 |
i = bit_ffs(job_ptr->node_bitmap); |
| 3721 |
if (i < 0) { |
3727 |
if (i < 0) { |
| 3722 |
error("_signal_batch_job JobId=%u lacks assigned nodes", |
3728 |
error("%s: JobId=%u lacks assigned nodes", |
| 3723 |
job_ptr->job_id); |
3729 |
__func__, job_ptr->job_id); |
|
|
3730 |
return; |
| 3731 |
} |
| 3732 |
if (flags > 0xf) { /* Top 4 bits used for KILL_* flags */ |
| 3733 |
error("%s: signal flags %u for job %u exceed limit", |
| 3734 |
__func__, flags, job_ptr->job_id); |
| 3735 |
return; |
| 3736 |
} |
| 3737 |
if (signal > 0xfff) { /* Top 4 bits used for KILL_* flags */ |
| 3738 |
error("%s: signal value %u for job %u exceed limit", |
| 3739 |
__func__, signal, job_ptr->job_id); |
| 3724 |
return; |
3740 |
return; |
| 3725 |
} |
3741 |
} |
| 3726 |
|
3742 |
|
|
Lines 3741-3753
_signal_batch_job(struct job_record *job_ptr, uint16_t signal)
Link Here
|
| 3741 |
kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t)); |
3757 |
kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t)); |
| 3742 |
kill_tasks_msg->job_id = job_ptr->job_id; |
3758 |
kill_tasks_msg->job_id = job_ptr->job_id; |
| 3743 |
kill_tasks_msg->job_step_id = NO_VAL; |
3759 |
kill_tasks_msg->job_step_id = NO_VAL; |
| 3744 |
/* Encode the KILL_JOB_BATCH flag for |
3760 |
|
| 3745 |
* stepd to know if has to signal only |
3761 |
/* Encode the KILL_JOB_BATCH|KILL_STEPS_ONLY flags for stepd to know if |
| 3746 |
* the batch script. The job was submitted |
3762 |
* has to signal only the batch script or only the steps. |
| 3747 |
* using the --signal=B:sig sbatch option. |
3763 |
* The job was submitted using the --signal=B:sig |
|
|
3764 |
* or without B sbatch option. |
| 3748 |
*/ |
3765 |
*/ |
| 3749 |
z = KILL_JOB_BATCH << 24; |
3766 |
if (flags == KILL_JOB_BATCH) |
| 3750 |
kill_tasks_msg->signal = z|signal; |
3767 |
z = KILL_JOB_BATCH << 24; |
|
|
3768 |
else if (flags == KILL_STEPS_ONLY) |
| 3769 |
z = KILL_STEPS_ONLY << 24; |
| 3770 |
|
| 3771 |
kill_tasks_msg->signal = z | signal; |
| 3751 |
|
3772 |
|
| 3752 |
agent_args->msg_args = kill_tasks_msg; |
3773 |
agent_args->msg_args = kill_tasks_msg; |
| 3753 |
agent_args->node_count = 1;/* slurm/477 be sure to update node_count */ |
3774 |
agent_args->node_count = 1;/* slurm/477 be sure to update node_count */ |
|
Lines 5969-5974
void job_time_limit(void)
Link Here
|
| 5969 |
if ((job_ptr->warn_time) && |
5990 |
if ((job_ptr->warn_time) && |
| 5970 |
(job_ptr->warn_time + PERIODIC_TIMEOUT + now >= |
5991 |
(job_ptr->warn_time + PERIODIC_TIMEOUT + now >= |
| 5971 |
job_ptr->end_time)) { |
5992 |
job_ptr->end_time)) { |
|
|
5993 |
|
| 5994 |
/* If --signal B option was not specified, |
| 5995 |
* signal only the steps but not the batch step. |
| 5996 |
*/ |
| 5997 |
if (job_ptr->warn_flags == 0) |
| 5998 |
job_ptr->warn_flags = KILL_STEPS_ONLY; |
| 5999 |
|
| 6000 |
|
| 5972 |
debug("Warning signal %u to job %u ", |
6001 |
debug("Warning signal %u to job %u ", |
| 5973 |
job_ptr->warn_signal, job_ptr->job_id); |
6002 |
job_ptr->warn_signal, job_ptr->job_id); |
| 5974 |
(void) job_signal(job_ptr->job_id, |
6003 |
(void) job_signal(job_ptr->job_id, |