View | Details | Raw Unified | Return to ticket 445 | Differences between
and this patch

Collapse All | Expand All

(-)a/NEWS (+3 lines)
Lines 38-43 documents those changes that are of interest to users and admins. Link Here
38
    can only be a TORUS (1).
38
    can only be a TORUS (1).
39
 -- Change max message length from 100MB to 1GB before generating "Insane
39
 -- Change max message length from 100MB to 1GB before generating "Insane
40
    message length" error.
40
    message length" error.
41
 -- sched/backfill - Prevent possible memory corruption due to use of
42
    bf_continue option and long running scheduling cycle (pending jobs could
43
    have been cancelled and purged).
41
44
42
* Changes in Slurm 2.6.2
45
* Changes in Slurm 2.6.2
43
========================
46
========================
(-)a/src/plugins/sched/backfill/backfill.c (-6 / +7 lines)
Lines 639-650 static int _attempt_backfill(void) Link Here
639
	}
639
	}
640
	while ((job_queue_rec = (job_queue_rec_t *)
640
	while ((job_queue_rec = (job_queue_rec_t *)
641
				list_pop_bottom(job_queue, sort_job_queue2))) {
641
				list_pop_bottom(job_queue, sort_job_queue2))) {
642
		job_ptr  = job_queue_rec->job_ptr;
643
		orig_time_limit = job_ptr->time_limit;
644
645
		if ((time(NULL) - sched_start) >= sched_timeout) {
642
		if ((time(NULL) - sched_start) >= sched_timeout) {
646
			uint32_t save_time_limit = job_ptr->time_limit;
647
			job_ptr->time_limit = orig_time_limit;
648
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
643
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
649
				END_TIMER;
644
				END_TIMER;
650
				info("backfill: completed yielding locks "
645
				info("backfill: completed yielding locks "
Lines 660-672 static int _attempt_backfill(void) Link Here
660
				rc = 1;
655
				rc = 1;
661
				break;
656
				break;
662
			}
657
			}
663
			job_ptr->time_limit = save_time_limit;
664
			/* Reset backfill scheduling timers, resume testing */
658
			/* Reset backfill scheduling timers, resume testing */
665
			sched_start = time(NULL);
659
			sched_start = time(NULL);
666
			job_test_count = 0;
660
			job_test_count = 0;
667
			START_TIMER;
661
			START_TIMER;
668
		}
662
		}
669
663
664
		job_ptr  = job_queue_rec->job_ptr;
665
		/* With bf_continue configured, the original job could have
666
		 * been cancelled and purged. Validate pointer here. */
667
		if ((job_ptr->magic  != JOB_MAGIC) ||
668
		    (job_ptr->job_id != job_queue_rec->job_id))
669
			continue;
670
		orig_time_limit = job_ptr->time_limit;
670
		part_ptr = job_queue_rec->part_ptr;
671
		part_ptr = job_queue_rec->part_ptr;
671
		job_test_count++;
672
		job_test_count++;
672
673
(-)a/src/slurmctld/job_scheduler.c (+1 lines)
Lines 140-145 static void _job_queue_append(List job_queue, struct job_record *job_ptr, Link Here
140
	job_queue_rec_t *job_queue_rec;
140
	job_queue_rec_t *job_queue_rec;
141
141
142
	job_queue_rec = xmalloc(sizeof(job_queue_rec_t));
142
	job_queue_rec = xmalloc(sizeof(job_queue_rec_t));
143
	job_queue_rec->job_id   = job_ptr->job_id;
143
	job_queue_rec->job_ptr  = job_ptr;
144
	job_queue_rec->job_ptr  = job_ptr;
144
	job_queue_rec->part_ptr = part_ptr;
145
	job_queue_rec->part_ptr = part_ptr;
145
	job_queue_rec->priority = prio;
146
	job_queue_rec->priority = prio;
(-)a/src/slurmctld/job_scheduler.h (-1 / +1 lines)
Lines 45-50 Link Here
45
#include "src/slurmctld/slurmctld.h"
45
#include "src/slurmctld/slurmctld.h"
46
46
47
typedef struct job_queue_rec {
47
typedef struct job_queue_rec {
48
	uint32_t job_id;
48
	struct job_record *job_ptr;
49
	struct job_record *job_ptr;
49
	struct part_record *part_ptr;
50
	struct part_record *part_ptr;
50
	uint32_t priority;
51
	uint32_t priority;
51
- 

Return to ticket 445