View | Details | Raw Unified | Return to ticket 2274
Collapse All | Expand All

(-)a/src/slurmctld/gang.c (-23 / +5 lines)
Lines 1027-1033 static uint16_t _add_job_to_part(struct gs_part *p_ptr, Link Here
1027
	p_ptr->job_list[p_ptr->num_jobs++] = j_ptr;
1027
	p_ptr->job_list[p_ptr->num_jobs++] = j_ptr;
1028
1028
1029
	/* determine the immediate fate of this job (run or suspend) */
1029
	/* determine the immediate fate of this job (run or suspend) */
1030
	if (_job_fits_in_active_row(job_ptr, p_ptr)) {
1030
	if (!IS_JOB_SUSPENDED(job_ptr) &&
1031
	    _job_fits_in_active_row(job_ptr, p_ptr)) {
1031
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
1032
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
1032
			info("gang: _add_job_to_part: job %u remains running",
1033
			info("gang: _add_job_to_part: job %u remains running",
1033
			     job_ptr->job_id);
1034
			     job_ptr->job_id);
Lines 1110-1127 static void _scan_slurm_job_list(void) Link Here
1110
			/* We're not tracking this job. Resume it if it's
1111
			/* We're not tracking this job. Resume it if it's
1111
			 * suspended, and then add it to the job list. */
1112
			 * suspended, and then add it to the job list. */
1112
1113
1113
			if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) {
1114
				/* The likely scenario here is that the
1115
				 * failed over, and this is a job that gang
1116
				 * had previously suspended. It's not possible
1117
				 * to determine the previous order of jobs
1118
				 * without preserving gang state, which is not
1119
				 * worth the extra infrastructure. Just resume
1120
				 * the job and then add it to the job list.
1121
				 */
1122
				_resume_job(job_ptr->job_id);
1123
			}
1124
1125
			_add_job_to_part(p_ptr, job_ptr);
1114
			_add_job_to_part(p_ptr, job_ptr);
1126
			continue;
1115
			continue;
1127
		}
1116
		}
Lines 1443-1458 extern int gs_reconfig(void) Link Here
1443
				/* job no longer exists in SLURM, so drop it */
1432
				/* job no longer exists in SLURM, so drop it */
1444
				continue;
1433
				continue;
1445
			}
1434
			}
1446
			/* resume any job that is suspended by us */
1435
			if (IS_JOB_SUSPENDED(job_ptr) &&
1447
			if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) {
1436
			    (job_ptr->priority == 0))
1448
				if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG){
1437
				continue;	/* not suspended by gang */
1449
					info("resuming job %u apparently "
1450
					     "suspended by gang",
1451
					     job_ptr->job_id);
1452
				}
1453
				_resume_job(job_ptr->job_id);
1454
			}
1455
1456
			/* transfer the job as long as it is still active */
1438
			/* transfer the job as long as it is still active */
1457
			if (IS_JOB_SUSPENDED(job_ptr) ||
1439
			if (IS_JOB_SUSPENDED(job_ptr) ||
1458
			    IS_JOB_RUNNING(job_ptr)) {
1440
			    IS_JOB_RUNNING(job_ptr)) {

Return to ticket 2274