|
Lines 53-58
Link Here
|
| 53 |
#include "src/common/cpu_frequency.h" |
53 |
#include "src/common/cpu_frequency.h" |
| 54 |
#include "src/common/slurm_resource_info.h" |
54 |
#include "src/common/slurm_resource_info.h" |
| 55 |
#include "src/common/bitstring.h" |
55 |
#include "src/common/bitstring.h" |
|
|
56 |
#include "src/common/proc_args.h" |
| 56 |
#include "src/common/xstring.h" |
57 |
#include "src/common/xstring.h" |
| 57 |
#include "src/common/xcgroup_read_config.h" |
58 |
#include "src/common/xcgroup_read_config.h" |
| 58 |
#include "src/common/xcgroup.h" |
59 |
#include "src/common/xcgroup.h" |
|
Lines 691-753
static int _task_cgroup_cpuset_dist_cyclic(
Link Here
|
| 691 |
{ |
692 |
{ |
| 692 |
hwloc_obj_t obj; |
693 |
hwloc_obj_t obj; |
| 693 |
uint32_t *obj_idx; |
694 |
uint32_t *obj_idx; |
| 694 |
uint32_t i, sock_idx, npskip, npdist, nsockets; |
695 |
uint32_t i, j, sock_idx, sock_loop, ntskip, npdist, nsockets; |
| 695 |
uint32_t taskid = job->envtp->localid; |
696 |
uint32_t taskid = job->envtp->localid; |
| 696 |
|
697 |
|
| 697 |
if (bind_verbose) |
698 |
if (bind_verbose) |
| 698 |
info("task/cgroup: task[%u] using cyclic distribution, " |
699 |
info("task/cgroup: task[%u] using %s distribution " |
| 699 |
"task_dist %u", taskid, job->task_dist); |
700 |
"(task_dist=%u)", taskid, |
|
|
701 |
format_task_dist_states(job->task_dist), job->task_dist); |
| 700 |
nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology, |
702 |
nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology, |
| 701 |
HWLOC_OBJ_SOCKET); |
703 |
HWLOC_OBJ_SOCKET); |
| 702 |
obj_idx = xmalloc(nsockets * sizeof(uint32_t)); |
704 |
obj_idx = xmalloc(nsockets * sizeof(uint32_t)); |
| 703 |
|
705 |
|
| 704 |
if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) { |
706 |
if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) { |
| 705 |
/* cores or threads granularity */ |
707 |
/* cores or threads granularity */ |
| 706 |
npskip = taskid * job->cpus_per_task; |
708 |
ntskip = taskid; |
| 707 |
npdist = job->cpus_per_task; |
709 |
npdist = job->cpus_per_task; |
| 708 |
} else { |
710 |
} else { |
| 709 |
/* sockets or ldoms granularity */ |
711 |
/* sockets or ldoms granularity */ |
| 710 |
npskip = taskid; |
712 |
ntskip = taskid; |
| 711 |
npdist = 1; |
713 |
npdist = 1; |
| 712 |
} |
714 |
} |
| 713 |
|
715 |
|
| 714 |
/* skip objs for lower taskids */ |
716 |
/* skip objs for lower taskids, then add them to the |
| 715 |
i = 0; |
717 |
current task cpuset. To prevent infinite loop, check |
|
|
718 |
that we do not loop more than npdist times around the available |
| 719 |
sockets, which is the worst scenario we should afford here. */ |
| 720 |
i = 0; j = 0; |
| 716 |
sock_idx = 0; |
721 |
sock_idx = 0; |
| 717 |
while (i < npskip) { |
722 |
sock_loop = 0; |
| 718 |
while ((sock_idx < nsockets) && (i < npskip)) { |
723 |
while (i < ntskip + 1 && sock_loop < npdist + 1) { |
|
|
724 |
/* fill one or multiple sockets using block mode, unless |
| 725 |
otherwise stated in the job->task_dist field */ |
| 726 |
while ((sock_idx < nsockets) && (j < npdist)) { |
| 719 |
obj = hwloc_get_obj_below_by_type( |
727 |
obj = hwloc_get_obj_below_by_type( |
| 720 |
topology, HWLOC_OBJ_SOCKET, sock_idx, |
728 |
topology, HWLOC_OBJ_SOCKET, sock_idx, |
| 721 |
hwtype, obj_idx[sock_idx]); |
729 |
hwtype, obj_idx[sock_idx]); |
| 722 |
if (obj != NULL) { |
730 |
if (obj != NULL) { |
| 723 |
obj_idx[sock_idx]++; |
731 |
obj_idx[sock_idx]++; |
| 724 |
i++; |
732 |
j++; |
|
|
733 |
if (i == ntskip) |
| 734 |
_add_hwloc_cpuset(hwtype, req_hwtype, |
| 735 |
obj, taskid, |
| 736 |
bind_verbose, cpuset); |
| 737 |
if ((j < npdist) && |
| 738 |
((job->task_dist == |
| 739 |
SLURM_DIST_CYCLIC_CFULL) || |
| 740 |
(job->task_dist == |
| 741 |
SLURM_DIST_BLOCK_CFULL))) |
| 742 |
sock_idx++; |
| 743 |
} else { |
| 744 |
sock_idx++; |
| 725 |
} |
745 |
} |
| 726 |
sock_idx++; |
|
|
| 727 |
} |
746 |
} |
| 728 |
if (i < npskip) |
747 |
/* if it succeed, switch to the next task, starting |
|
|
748 |
with the next available socket, otherwise, loop back |
| 749 |
from the first socket trying to find available slots. */ |
| 750 |
if (j == npdist) { |
| 751 |
i++; j = 0; |
| 752 |
sock_idx++; // no validity check, handled by the while |
| 753 |
sock_loop = 0; |
| 754 |
} else { |
| 755 |
sock_loop++; |
| 729 |
sock_idx = 0; |
756 |
sock_idx = 0; |
| 730 |
} |
|
|
| 731 |
|
| 732 |
/* distribute objs cyclically across sockets */ |
| 733 |
i = npdist; |
| 734 |
while (i > 0) { |
| 735 |
while ((sock_idx < nsockets) && (i > 0)) { |
| 736 |
obj = hwloc_get_obj_below_by_type( |
| 737 |
topology, HWLOC_OBJ_SOCKET, sock_idx, |
| 738 |
hwtype, obj_idx[sock_idx]); |
| 739 |
if (obj != NULL) { |
| 740 |
obj_idx[sock_idx]++; |
| 741 |
_add_hwloc_cpuset(hwtype, req_hwtype, obj, |
| 742 |
taskid, bind_verbose, cpuset); |
| 743 |
i--; |
| 744 |
} |
| 745 |
sock_idx++; |
| 746 |
} |
757 |
} |
| 747 |
sock_idx = 0; |
|
|
| 748 |
} |
758 |
} |
|
|
759 |
|
| 749 |
xfree(obj_idx); |
760 |
xfree(obj_idx); |
| 750 |
return XCGROUP_SUCCESS; |
761 |
|
|
|
762 |
/* should never happened in normal scenario */ |
| 763 |
if (sock_loop > npdist) { |
| 764 |
error("task/cgroup: task[%u] infinite loop broken while trying" |
| 765 |
"to provision compute elements using %u", taskid, |
| 766 |
format_task_dist_states(job->task_dist)); |
| 767 |
return XCGROUP_ERROR; |
| 768 |
} else |
| 769 |
return XCGROUP_SUCCESS; |
| 751 |
} |
770 |
} |
| 752 |
|
771 |
|
| 753 |
static int _task_cgroup_cpuset_dist_block( |
772 |
static int _task_cgroup_cpuset_dist_block( |
| 754 |
- |
|
|