|
Lines 1027-1033
static uint16_t _add_job_to_part(struct gs_part *p_ptr,
Link Here
|
| 1027 |
p_ptr->job_list[p_ptr->num_jobs++] = j_ptr; |
1027 |
p_ptr->job_list[p_ptr->num_jobs++] = j_ptr; |
| 1028 |
|
1028 |
|
| 1029 |
/* determine the immediate fate of this job (run or suspend) */ |
1029 |
/* determine the immediate fate of this job (run or suspend) */ |
| 1030 |
if (_job_fits_in_active_row(job_ptr, p_ptr)) { |
1030 |
if (!IS_JOB_SUSPENDED(job_ptr) && |
|
|
1031 |
_job_fits_in_active_row(job_ptr, p_ptr)) { |
| 1031 |
if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) { |
1032 |
if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) { |
| 1032 |
info("gang: _add_job_to_part: job %u remains running", |
1033 |
info("gang: _add_job_to_part: job %u remains running", |
| 1033 |
job_ptr->job_id); |
1034 |
job_ptr->job_id); |
|
Lines 1110-1127
static void _scan_slurm_job_list(void)
Link Here
|
| 1110 |
/* We're not tracking this job. Resume it if it's |
1111 |
/* We're not tracking this job. Resume it if it's |
| 1111 |
* suspended, and then add it to the job list. */ |
1112 |
* suspended, and then add it to the job list. */ |
| 1112 |
|
1113 |
|
| 1113 |
if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) { |
|
|
| 1114 |
/* The likely scenario here is that the |
| 1115 |
* failed over, and this is a job that gang |
| 1116 |
* had previously suspended. It's not possible |
| 1117 |
* to determine the previous order of jobs |
| 1118 |
* without preserving gang state, which is not |
| 1119 |
* worth the extra infrastructure. Just resume |
| 1120 |
* the job and then add it to the job list. |
| 1121 |
*/ |
| 1122 |
_resume_job(job_ptr->job_id); |
| 1123 |
} |
| 1124 |
|
| 1125 |
_add_job_to_part(p_ptr, job_ptr); |
1114 |
_add_job_to_part(p_ptr, job_ptr); |
| 1126 |
continue; |
1115 |
continue; |
| 1127 |
} |
1116 |
} |
|
Lines 1443-1458
extern int gs_reconfig(void)
Link Here
|
| 1443 |
/* job no longer exists in SLURM, so drop it */ |
1432 |
/* job no longer exists in SLURM, so drop it */ |
| 1444 |
continue; |
1433 |
continue; |
| 1445 |
} |
1434 |
} |
| 1446 |
/* resume any job that is suspended by us */ |
1435 |
if (IS_JOB_SUSPENDED(job_ptr) && |
| 1447 |
if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) { |
1436 |
(job_ptr->priority == 0)) |
| 1448 |
if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG){ |
1437 |
continue; /* not suspended by gang */ |
| 1449 |
info("resuming job %u apparently " |
|
|
| 1450 |
"suspended by gang", |
| 1451 |
job_ptr->job_id); |
| 1452 |
} |
| 1453 |
_resume_job(job_ptr->job_id); |
| 1454 |
} |
| 1455 |
|
| 1456 |
/* transfer the job as long as it is still active */ |
1438 |
/* transfer the job as long as it is still active */ |
| 1457 |
if (IS_JOB_SUSPENDED(job_ptr) || |
1439 |
if (IS_JOB_SUSPENDED(job_ptr) || |
| 1458 |
IS_JOB_RUNNING(job_ptr)) { |
1440 |
IS_JOB_RUNNING(job_ptr)) { |