|
Lines 1082-1089
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1082 |
xassert(job_gres_ptr); |
1082 |
xassert(job_gres_ptr); |
| 1083 |
xassert(node_gres_ptr); |
1083 |
xassert(node_gres_ptr); |
| 1084 |
|
1084 |
|
| 1085 |
if (node_gres_ptr->no_consume) |
1085 |
log_flag(GRES, "%s:%d: JobId=%u, NodeName=%s", __func__, __LINE__, job_id, node_name); |
|
|
1086 |
if (node_gres_ptr->no_consume) { |
| 1087 |
log_flag(GRES, "%s:%d: It's no_consume now", __func__, __LINE__); |
| 1086 |
return SLURM_SUCCESS; |
1088 |
return SLURM_SUCCESS; |
|
|
1089 |
} |
| 1087 |
|
1090 |
|
| 1088 |
if (job_gres_ptr->node_cnt <= node_offset) { |
1091 |
if (job_gres_ptr->node_cnt <= node_offset) { |
| 1089 |
error("gres/%s: job %u dealloc of node %s bad node_offset %d " |
1092 |
error("gres/%s: job %u dealloc of node %s bad node_offset %d " |
|
Lines 1093-1098
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1093 |
} |
1096 |
} |
| 1094 |
|
1097 |
|
| 1095 |
if (gres_id_shared(plugin_id)) { |
1098 |
if (gres_id_shared(plugin_id)) { |
|
|
1099 |
log_flag(GRES, "%s:%d: Shared GRES", __func__, __LINE__); |
| 1096 |
gres_per_bit = job_gres_ptr->gres_per_node; |
1100 |
gres_per_bit = job_gres_ptr->gres_per_node; |
| 1097 |
xassert(gres_per_bit); |
1101 |
xassert(gres_per_bit); |
| 1098 |
} |
1102 |
} |
|
Lines 1100-1105
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1100 |
xfree(node_gres_ptr->gres_used); /* Clear cache */ |
1104 |
xfree(node_gres_ptr->gres_used); /* Clear cache */ |
| 1101 |
if (node_gres_ptr->gres_bit_alloc && job_gres_ptr->gres_bit_alloc && |
1105 |
if (node_gres_ptr->gres_bit_alloc && job_gres_ptr->gres_bit_alloc && |
| 1102 |
job_gres_ptr->gres_bit_alloc[node_offset]) { |
1106 |
job_gres_ptr->gres_bit_alloc[node_offset]) { |
|
|
1107 |
log_flag(GRES, "%s:%d: Job and node have gres_bit_alloc", __func__, __LINE__); |
| 1103 |
len = bit_size(job_gres_ptr->gres_bit_alloc[node_offset]); |
1108 |
len = bit_size(job_gres_ptr->gres_bit_alloc[node_offset]); |
| 1104 |
i = bit_size(node_gres_ptr->gres_bit_alloc); |
1109 |
i = bit_size(node_gres_ptr->gres_bit_alloc); |
| 1105 |
if (i != len) { |
1110 |
if (i != len) { |
|
Lines 1133-1154
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1133 |
} |
1138 |
} |
| 1134 |
} else if (job_gres_ptr->gres_cnt_node_alloc) { |
1139 |
} else if (job_gres_ptr->gres_cnt_node_alloc) { |
| 1135 |
gres_cnt = job_gres_ptr->gres_cnt_node_alloc[node_offset]; |
1140 |
gres_cnt = job_gres_ptr->gres_cnt_node_alloc[node_offset]; |
|
|
1141 |
log_flag(GRES, "%s:%d: Job has gres_cnt_node_alloc gres_cnt:%"PRIu64, __func__, __LINE__, gres_cnt); |
| 1136 |
} else { |
1142 |
} else { |
| 1137 |
gres_cnt = job_gres_ptr->gres_per_node; |
1143 |
gres_cnt = job_gres_ptr->gres_per_node; |
|
|
1144 |
log_flag(GRES, "%s:%d: It's an else case setting gres_cnt:%"PRIu64, __func__, __LINE__, gres_cnt); |
| 1138 |
} |
1145 |
} |
| 1139 |
if (gres_cnt && (node_gres_ptr->gres_cnt_alloc >= gres_cnt)) |
1146 |
if (gres_cnt && (node_gres_ptr->gres_cnt_alloc >= gres_cnt)) { |
| 1140 |
node_gres_ptr->gres_cnt_alloc -= gres_cnt; |
1147 |
node_gres_ptr->gres_cnt_alloc -= gres_cnt; |
| 1141 |
else if (gres_cnt) { |
1148 |
log_flag(GRES, "%s:%d: Removing gres_cnt from node_gres_ptr: %"PRIu64, __func__, __LINE__, node_gres_ptr->gres_cnt_alloc); |
|
|
1149 |
} else if (gres_cnt) { |
| 1142 |
error("gres/%s: job %u node %s GRES count underflow (%"PRIu64" < %"PRIu64")", |
1150 |
error("gres/%s: job %u node %s GRES count underflow (%"PRIu64" < %"PRIu64")", |
| 1143 |
gres_name, job_id, node_name, |
1151 |
gres_name, job_id, node_name, |
| 1144 |
node_gres_ptr->gres_cnt_alloc, gres_cnt); |
1152 |
node_gres_ptr->gres_cnt_alloc, gres_cnt); |
| 1145 |
node_gres_ptr->gres_cnt_alloc = 0; |
1153 |
node_gres_ptr->gres_cnt_alloc = 0; |
|
|
1154 |
} else { |
| 1155 |
log_flag(GRES, "%s:%d: This is a never happen case now node_gres_ptr->gres_cnt_alloc: %"PRIu64, __func__, __LINE__, node_gres_ptr->gres_cnt_alloc); |
| 1146 |
} |
1156 |
} |
| 1147 |
|
1157 |
|
| 1148 |
if (job_gres_ptr->gres_bit_alloc && |
1158 |
if (job_gres_ptr->gres_bit_alloc && |
| 1149 |
job_gres_ptr->gres_bit_alloc[node_offset] && |
1159 |
job_gres_ptr->gres_bit_alloc[node_offset] && |
| 1150 |
node_gres_ptr->topo_gres_bitmap && |
1160 |
node_gres_ptr->topo_gres_bitmap && |
| 1151 |
node_gres_ptr->topo_gres_cnt_alloc) { |
1161 |
node_gres_ptr->topo_gres_cnt_alloc) { |
|
|
1162 |
log_flag(GRES, "%s:%d: Job has gres_bit_alloc and node has a topo_gres_bitmap and topo_gres_cnt_alloc", __func__, __LINE__); |
| 1152 |
for (i = 0; i < node_gres_ptr->topo_cnt; i++) { |
1163 |
for (i = 0; i < node_gres_ptr->topo_cnt; i++) { |
| 1153 |
sz1 = bit_size( |
1164 |
sz1 = bit_size( |
| 1154 |
job_gres_ptr->gres_bit_alloc[node_offset]); |
1165 |
job_gres_ptr->gres_bit_alloc[node_offset]); |
|
Lines 1201-1206
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1201 |
} else if (job_gres_ptr->gres_bit_alloc && |
1212 |
} else if (job_gres_ptr->gres_bit_alloc && |
| 1202 |
job_gres_ptr->gres_bit_alloc[node_offset] && |
1213 |
job_gres_ptr->gres_bit_alloc[node_offset] && |
| 1203 |
node_gres_ptr->topo_gres_cnt_alloc) { |
1214 |
node_gres_ptr->topo_gres_cnt_alloc) { |
|
|
1215 |
log_flag(GRES, "%s:%d: Job has gres_bit_alloc and node has topo_gres_cnt_alloc", __func__, __LINE__); |
| 1204 |
/* Avoid crash if configuration inconsistent */ |
1216 |
/* Avoid crash if configuration inconsistent */ |
| 1205 |
len = MIN(node_gres_ptr->gres_cnt_config, |
1217 |
len = MIN(node_gres_ptr->gres_cnt_config, |
| 1206 |
bit_size(job_gres_ptr-> |
1218 |
bit_size(job_gres_ptr-> |
|
Lines 1208-1219
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1208 |
for (i = 0; i < len; i++) { |
1220 |
for (i = 0; i < len; i++) { |
| 1209 |
if (!bit_test(job_gres_ptr-> |
1221 |
if (!bit_test(job_gres_ptr-> |
| 1210 |
gres_bit_alloc[node_offset], i) || |
1222 |
gres_bit_alloc[node_offset], i) || |
| 1211 |
!node_gres_ptr->topo_gres_cnt_alloc[i]) |
1223 |
!node_gres_ptr->topo_gres_cnt_alloc[i]) { |
|
|
1224 |
log_flag(GRES, "%s:%d: job gres_bit_alloc at node offset not set or no topo_gres_cnt_alloc on node", __func__, __LINE__); |
| 1212 |
continue; |
1225 |
continue; |
|
|
1226 |
} |
| 1213 |
if (node_gres_ptr->topo_gres_cnt_alloc[i] >= |
1227 |
if (node_gres_ptr->topo_gres_cnt_alloc[i] >= |
| 1214 |
gres_per_bit) { |
1228 |
gres_per_bit) { |
| 1215 |
node_gres_ptr->topo_gres_cnt_alloc[i] -= |
1229 |
node_gres_ptr->topo_gres_cnt_alloc[i] -= |
| 1216 |
gres_per_bit; |
1230 |
gres_per_bit; |
|
|
1231 |
log_flag(GRES, "%s:%d: Removing gres_per_bit: %"PRIu64, __func__, __LINE__, gres_per_bit); |
| 1232 |
|
| 1217 |
} else { |
1233 |
} else { |
| 1218 |
error("gres/%s: job %u dealloc node %s " |
1234 |
error("gres/%s: job %u dealloc node %s " |
| 1219 |
"topo_gres_cnt_alloc[%d] count underflow " |
1235 |
"topo_gres_cnt_alloc[%d] count underflow " |
|
Lines 1225-1241
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1225 |
} |
1241 |
} |
| 1226 |
if ((node_gres_ptr->type_cnt == 0) || |
1242 |
if ((node_gres_ptr->type_cnt == 0) || |
| 1227 |
(node_gres_ptr->topo_type_name == NULL) || |
1243 |
(node_gres_ptr->topo_type_name == NULL) || |
| 1228 |
(node_gres_ptr->topo_type_name[i] == NULL)) |
1244 |
(node_gres_ptr->topo_type_name[i] == NULL)) { |
|
|
1245 |
log_flag(GRES, "%s:%d: node type_cnt:%d ", __func__, __LINE__, node_gres_ptr->type_cnt); |
| 1229 |
continue; |
1246 |
continue; |
|
|
1247 |
} |
| 1230 |
for (j = 0; j < node_gres_ptr->type_cnt; j++) { |
1248 |
for (j = 0; j < node_gres_ptr->type_cnt; j++) { |
| 1231 |
if (!node_gres_ptr->type_name[j] || |
1249 |
if (!node_gres_ptr->type_name[j] || |
| 1232 |
(node_gres_ptr->topo_type_id[i] != |
1250 |
(node_gres_ptr->topo_type_id[i] != |
| 1233 |
node_gres_ptr->type_id[j])) |
1251 |
node_gres_ptr->type_id[j])) { |
|
|
1252 |
log_flag(GRES, "%s:%d: no type_name or topo_type_id and type_id missmatch", __func__, __LINE__); |
| 1234 |
continue; |
1253 |
continue; |
|
|
1254 |
} |
| 1235 |
if (node_gres_ptr->type_cnt_alloc[j] >= |
1255 |
if (node_gres_ptr->type_cnt_alloc[j] >= |
| 1236 |
gres_per_bit) { |
1256 |
gres_per_bit) { |
| 1237 |
node_gres_ptr->type_cnt_alloc[j] -= |
1257 |
node_gres_ptr->type_cnt_alloc[j] -= |
| 1238 |
gres_per_bit; |
1258 |
gres_per_bit; |
|
|
1259 |
log_flag(GRES, "%s:%d: Deallocating gres_per_bit:%"PRIu64, __func__, __LINE__, gres_per_bit); |
| 1239 |
} else { |
1260 |
} else { |
| 1240 |
error("gres/%s: job %u dealloc node %s " |
1261 |
error("gres/%s: job %u dealloc node %s " |
| 1241 |
"type %s type_cnt_alloc count underflow " |
1262 |
"type %s type_cnt_alloc count underflow " |
|
Lines 1249-1269
static int _job_dealloc(void *job_gres_data, void *node_gres_data,
Link Here
|
| 1249 |
} |
1270 |
} |
| 1250 |
} |
1271 |
} |
| 1251 |
} else if (job_gres_ptr->type_name) { |
1272 |
} else if (job_gres_ptr->type_name) { |
|
|
1273 |
log_flag(GRES, "%s:%d: Job has a type_name:%s", __func__, __LINE__, job_gres_ptr->type_name); |
| 1252 |
for (j = 0; j < node_gres_ptr->type_cnt; j++) { |
1274 |
for (j = 0; j < node_gres_ptr->type_cnt; j++) { |
| 1253 |
if (job_gres_ptr->type_id != |
1275 |
if (job_gres_ptr->type_id != |
| 1254 |
node_gres_ptr->type_id[j]) |
1276 |
node_gres_ptr->type_id[j]) { |
|
|
1277 |
log_flag(GRES, "%s:%d: Job and node GRES type differ job:%d node:%d", __func__, __LINE__, job_gres_ptr->type_id, node_gres_ptr->type_id[j]); |
| 1255 |
continue; |
1278 |
continue; |
|
|
1279 |
} |
| 1256 |
k = MIN(gres_cnt, node_gres_ptr->type_cnt_alloc[j]); |
1280 |
k = MIN(gres_cnt, node_gres_ptr->type_cnt_alloc[j]); |
| 1257 |
node_gres_ptr->type_cnt_alloc[j] -= k; |
1281 |
node_gres_ptr->type_cnt_alloc[j] -= k; |
| 1258 |
gres_cnt -= k; |
1282 |
gres_cnt -= k; |
| 1259 |
if (gres_cnt == 0) |
1283 |
if (gres_cnt == 0) |
| 1260 |
break; |
1284 |
break; |
| 1261 |
} |
1285 |
} |
| 1262 |
} |
1286 |
} else { |
|
|
1287 |
log_flag(GRES, "%s:%d: This is a never happen case in todays code, gres_cnt:%"PRIu64, __func__, __LINE__, gres_cnt); |
| 1288 |
} |
| 1289 |
|
| 1263 |
|
1290 |
|
| 1264 |
if (!resize) |
1291 |
if (!resize) |
| 1265 |
return SLURM_SUCCESS; |
1292 |
return SLURM_SUCCESS; |
| 1266 |
|
1293 |
|
|
|
1294 |
|
| 1295 |
log_flag(GRES, "%s:%d: This is job resize!", __func__, __LINE__); |
| 1296 |
|
| 1267 |
xassert(job_gres_ptr->node_cnt >= 1); |
1297 |
xassert(job_gres_ptr->node_cnt >= 1); |
| 1268 |
|
1298 |
|
| 1269 |
/* |
1299 |
/* |
| 1270 |
- |
|
|