View | Details | Raw Unified | Return to ticket 13950
Collapse All | Expand All

(-)slurm-21.08.7.orig/src/common/cgroup.h (-1 / +2 lines)
Lines 126-132 typedef struct { Link Here
126
typedef struct {
126
typedef struct {
127
	uint64_t usec;
127
	uint64_t usec;
128
	uint64_t ssec;
128
	uint64_t ssec;
129
	uint64_t total_rss;
129
	uint64_t total_inactive_anon;
130
	uint64_t total_active_anon;
130
	uint64_t total_pgmajfault;
131
	uint64_t total_pgmajfault;
131
} cgroup_acct_t;
132
} cgroup_acct_t;
132
133
(-)slurm-21.08.7.orig/src/plugins/cgroup/v1/cgroup_v1.c (-3 / +6 lines)
Lines 1591-1604 extern cgroup_acct_t *cgroup_p_task_get_ Link Here
1591
	stats = xmalloc(sizeof(*stats));
1591
	stats = xmalloc(sizeof(*stats));
1592
	stats->usec = NO_VAL64;
1592
	stats->usec = NO_VAL64;
1593
	stats->ssec = NO_VAL64;
1593
	stats->ssec = NO_VAL64;
1594
	stats->total_rss = NO_VAL64;
1594
	stats->total_inactive_anon = NO_VAL64;
1595
	stats->total_active_anon = NO_VAL64;
1595
	stats->total_pgmajfault = NO_VAL64;
1596
	stats->total_pgmajfault = NO_VAL64;
1596
1597
1597
	if (cpu_time != NULL)
1598
	if (cpu_time != NULL)
1598
		sscanf(cpu_time, "%*s %lu %*s %lu", &stats->usec, &stats->ssec);
1599
		sscanf(cpu_time, "%*s %lu %*s %lu", &stats->usec, &stats->ssec);
1599
1600
1600
	if ((ptr = xstrstr(memory_stat, "total_rss")))
1601
	if ((ptr = xstrstr(memory_stat, "total_inactive_anon")))
1601
		sscanf(ptr, "total_rss %lu", &stats->total_rss);
1602
		sscanf(ptr, "total_inactive_anon %lu", &stats->total_inactive_anon);
1603
	if ((ptr = xstrstr(memory_stat, "total_active_anon")))
1604
		sscanf(ptr, "total_active_anon %lu", &stats->total_active_anon);
1602
	if ((ptr = xstrstr(memory_stat, "total_pgmajfault")))
1605
	if ((ptr = xstrstr(memory_stat, "total_pgmajfault")))
1603
		sscanf(ptr, "total_pgmajfault %lu", &stats->total_pgmajfault);
1606
		sscanf(ptr, "total_pgmajfault %lu", &stats->total_pgmajfault);
1604
1607
(-)slurm-21.08.7.orig/src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.c (-6 / +7 lines)
Lines 105-123 static void _prec_extra(jag_prec_t *prec Link Here
105
		prec->ssec = cgroup_acct_data->ssec;
105
		prec->ssec = cgroup_acct_data->ssec;
106
	}
106
	}
107
107
108
	if (cgroup_acct_data->total_rss == NO_VAL64 &&
108
	if (cgroup_acct_data->total_inactive_anon == NO_VAL64 &&
109
	    cgroup_acct_data->total_active_anon == NO_VAL64 &&
109
	    cgroup_acct_data->total_pgmajfault == NO_VAL64) {
110
	    cgroup_acct_data->total_pgmajfault == NO_VAL64) {
110
		debug2("failed to collect cgroup memory stats pid %d ppid %d",
111
		debug2("failed to collect cgroup memory stats pid %d ppid %d",
111
		       prec->pid, prec->ppid);
112
		       prec->pid, prec->ppid);
112
	} else {
113
	} else {
113
		/*
114
		/*
114
		 * This number represents the amount of "dirty" private memory
115
		 * use rss+tmpfs instead of just rss. this means we include /dev/shm etc. usage
115
		 * used by the cgroup.  From our experience this is slightly
116
		 * rss + tmpfs == total_inactive_anon + total_active_anon
116
		 * different than what proc presents, but is probably more
117
		 * see 
117
		 * accurate on what the user is actually using.
118
		 *    https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-memory
118
		 */
119
		 */
119
		prec->tres_data[TRES_ARRAY_MEM].size_read =
120
		prec->tres_data[TRES_ARRAY_MEM].size_read =
120
			cgroup_acct_data->total_rss;
121
			cgroup_acct_data->total_inactive_anon + cgroup_acct_data->total_active_anon;
121
122
122
		/*
123
		/*
123
		 * total_pgmajfault is what is reported in proc, so we use
124
		 * total_pgmajfault is what is reported in proc, so we use

Return to ticket 13950