View | Details | Raw Unified | Return to ticket 825
Collapse All | Expand All

(-)a/src/plugins/task/cgroup/task_cgroup_cpuset.c (-31 / +49 lines)
Lines 53-58 Link Here
53
#include "src/common/cpu_frequency.h"
53
#include "src/common/cpu_frequency.h"
54
#include "src/common/slurm_resource_info.h"
54
#include "src/common/slurm_resource_info.h"
55
#include "src/common/bitstring.h"
55
#include "src/common/bitstring.h"
56
#include "src/common/proc_args.h"
56
#include "src/common/xstring.h"
57
#include "src/common/xstring.h"
57
#include "src/common/xcgroup_read_config.h"
58
#include "src/common/xcgroup_read_config.h"
58
#include "src/common/xcgroup.h"
59
#include "src/common/xcgroup.h"
Lines 691-753 static int _task_cgroup_cpuset_dist_cyclic( Link Here
691
{
692
{
692
	hwloc_obj_t obj;
693
	hwloc_obj_t obj;
693
	uint32_t *obj_idx;
694
	uint32_t *obj_idx;
694
	uint32_t i, sock_idx, npskip, npdist, nsockets;
695
	uint32_t i, j, sock_idx, sock_loop, ntskip, npdist, nsockets;
695
	uint32_t taskid = job->envtp->localid;
696
	uint32_t taskid = job->envtp->localid;
696
697
697
	if (bind_verbose)
698
	if (bind_verbose)
698
		info("task/cgroup: task[%u] using cyclic distribution, "
699
		info("task/cgroup: task[%u] using %s distribution "
699
		     "task_dist %u", taskid, job->task_dist);
700
		     "(task_dist=%u)", taskid,
701
		     format_task_dist_states(job->task_dist), job->task_dist);
700
	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
702
	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
701
						       HWLOC_OBJ_SOCKET);
703
						       HWLOC_OBJ_SOCKET);
702
	obj_idx = xmalloc(nsockets * sizeof(uint32_t));
704
	obj_idx = xmalloc(nsockets * sizeof(uint32_t));
703
705
704
	if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
706
	if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
705
		/* cores or threads granularity */
707
		/* cores or threads granularity */
706
		npskip = taskid * job->cpus_per_task;
708
		ntskip = taskid;
707
		npdist = job->cpus_per_task;
709
		npdist = job->cpus_per_task;
708
	} else {
710
	} else {
709
		/* sockets or ldoms granularity */
711
		/* sockets or ldoms granularity */
710
		npskip = taskid;
712
		ntskip = taskid;
711
		npdist = 1;
713
		npdist = 1;
712
	}
714
	}
713
715
714
	/* skip objs for lower taskids */
716
	/* skip objs for lower taskids, then add them to the
715
	i = 0;
717
	   current task cpuset. To prevent infinite loop, check
718
	   that we do not loop more than npdist times around the available
719
	   sockets, which is the worst scenario we should afford here. */
720
	i = 0; j = 0;
716
	sock_idx = 0;
721
	sock_idx = 0;
717
	while (i < npskip) {
722
	sock_loop = 0;
718
		while ((sock_idx < nsockets) && (i < npskip)) {
723
	while (i < ntskip + 1 && sock_loop < npdist + 1) {
724
		/* fill one or multiple sockets using block mode, unless
725
		   otherwise stated in the job->task_dist field */
726
		while ((sock_idx < nsockets) && (j < npdist)) {
719
			obj = hwloc_get_obj_below_by_type(
727
			obj = hwloc_get_obj_below_by_type(
720
				topology, HWLOC_OBJ_SOCKET, sock_idx,
728
				topology, HWLOC_OBJ_SOCKET, sock_idx,
721
				hwtype, obj_idx[sock_idx]);
729
				hwtype, obj_idx[sock_idx]);
722
			if (obj != NULL) {
730
			if (obj != NULL) {
723
				obj_idx[sock_idx]++;
731
				obj_idx[sock_idx]++;
724
				i++;
732
				j++;
733
				if (i == ntskip)
734
					_add_hwloc_cpuset(hwtype, req_hwtype,
735
							  obj, taskid,
736
							  bind_verbose, cpuset);
737
				if ((j < npdist) &&
738
				    ((job->task_dist ==
739
				      SLURM_DIST_CYCLIC_CFULL) ||
740
				     (job->task_dist ==
741
				      SLURM_DIST_BLOCK_CFULL)))
742
					sock_idx++;
743
			} else {
744
				sock_idx++;
725
			}
745
			}
726
			sock_idx++;
727
		}
746
		}
728
		if (i < npskip)
747
		/* if it succeed, switch to the next task, starting
748
		   with the next available socket, otherwise, loop back
749
		   from the first socket trying to find available slots. */
750
		if (j == npdist) {
751
			i++; j = 0;
752
			sock_idx++; // no validity check, handled by the while
753
			sock_loop = 0;
754
		} else {
755
			sock_loop++;
729
			sock_idx = 0;
756
			sock_idx = 0;
730
	}
731
732
	/* distribute objs cyclically across sockets */
733
	i = npdist;
734
	while (i > 0) {
735
		while ((sock_idx < nsockets) && (i > 0)) {
736
			obj = hwloc_get_obj_below_by_type(
737
				topology, HWLOC_OBJ_SOCKET, sock_idx,
738
				hwtype, obj_idx[sock_idx]);
739
			if (obj != NULL) {
740
				obj_idx[sock_idx]++;
741
				_add_hwloc_cpuset(hwtype, req_hwtype, obj,
742
					    taskid, bind_verbose, cpuset);
743
				i--;
744
			}
745
			sock_idx++;
746
		}
757
		}
747
		sock_idx = 0;
748
	}
758
	}
759
749
	xfree(obj_idx);
760
	xfree(obj_idx);
750
	return XCGROUP_SUCCESS;
761
762
	/* should never happened in normal scenario */
763
	if (sock_loop > npdist) {
764
		error("task/cgroup: task[%u] infinite loop broken while trying"
765
		      "to provision compute elements using %u", taskid,
766
		      format_task_dist_states(job->task_dist));
767
		return XCGROUP_ERROR;
768
	} else
769
		return XCGROUP_SUCCESS;
751
}
770
}
752
771
753
static int _task_cgroup_cpuset_dist_block(
772
static int _task_cgroup_cpuset_dist_block(
754
- 

Return to ticket 825