View | Details | Raw Unified | Return to ticket 15280 | Differences between
and this patch

Collapse All | Expand All

(-)a/src/slurmctld/step_mgr.c (-15 / +23 lines)
Lines 2012-2044 static bool _handle_core_select(step_record_t *step_ptr, Link Here
2012
		}
2012
		}
2013
	} else { /* SLURM_DIST_SOCKCYCLIC */
2013
	} else { /* SLURM_DIST_SOCKCYCLIC */
2014
		int task_alloc_cpus = 0;
2014
		int task_alloc_cpus = 0;
2015
		int start_core = 0;
2015
		int *next_core = xcalloc(sockets, sizeof(int));
2016
		int last_core = 0;
2016
		bool nothing_allocated = false;
2017
		for (int j=0; j < bit_set_count(avail_core_bitmap); j++) {
2017
		while (!nothing_allocated) {
2018
			for (sock_inx=0; sock_inx < sockets; sock_inx++) {
2018
			nothing_allocated = true;
2019
				for (i = start_core; i < cores; i++) {
2019
			for (sock_inx = 0; sock_inx < sockets; sock_inx++) {
2020
				for (i = next_core[sock_inx]; i < cores;
2021
				     i++) {
2020
					if (oversubscribing_cpus)
2022
					if (oversubscribing_cpus)
2021
						core_inx = (last_core_inx + i) % cores;
2023
						core_inx = (last_core_inx + i) %
2024
							cores;
2022
					else
2025
					else
2023
						core_inx = i;
2026
						core_inx = i;
2024
					if (!_pick_step_core(step_ptr, job_resrcs_ptr,
2027
2028
					next_core[sock_inx] = i + 1;
2029
					if (!_pick_step_core(step_ptr,
2030
							     job_resrcs_ptr,
2025
							     avail_core_bitmap,
2031
							     avail_core_bitmap,
2026
							     job_node_inx, sock_inx,
2032
							     job_node_inx,
2027
							     core_inx, use_all_cores,
2033
							     sock_inx,
2028
							     oversubscribing_cpus))
2034
							     core_inx,
2035
							     use_all_cores,
2036
							oversubscribing_cpus))
2029
						continue;
2037
						continue;
2030
					if (--(*cpu_cnt) == 0)
2038
					nothing_allocated = false;
2039
					if (--(*cpu_cnt) == 0) {
2040
						xfree(next_core);
2031
						return true;
2041
						return true;
2042
					}
2032
					if (++task_alloc_cpus ==
2043
					if (++task_alloc_cpus ==
2033
					    cpus_per_task) {
2044
					    cpus_per_task) {
2034
						task_alloc_cpus = 0;
2045
						task_alloc_cpus = 0;
2035
						last_core = i;
2036
						break;
2046
						break;
2037
					}
2047
					}
2038
				}
2048
				}
2039
			}
2049
			}
2040
			start_core = last_core + 1;
2041
		}
2050
		}
2051
		xfree(next_core);
2042
	}
2052
	}
2043
	return false;
2053
	return false;
2044
}
2054
}
2045
- 
2046
--
2047
NEWS | 2 ++
2055
NEWS | 2 ++
2048
1 file changed, 2 insertions(+)
2056
1 file changed, 2 insertions(+)
(-)a/NEWS (-1 / +2 lines)
Lines 12-17 documents those changes that are of interest to users and administrators. Link Here
12
 -- Clarify error message when --send-libs=yes or BcastParameters=send_libs
12
 -- Clarify error message when --send-libs=yes or BcastParameters=send_libs
13
    fails to identify shared library files, and avoid creating an empty
13
    fails to identify shared library files, and avoid creating an empty
14
    "<filename>_libs" directory on the target filesystem.
14
    "<filename>_libs" directory on the target filesystem.
15
 -- Fix bug in core selection for the default cyclic distribution of tasks
16
    across sockets, that resulted in random task launch failures.
15
17
16
* Changes in Slurm 22.05.5
18
* Changes in Slurm 22.05.5
17
==========================
19
==========================
18
- 

Return to ticket 15280