View | Details | Raw Unified | Return to ticket 6385 | Differences between
and this patch

Collapse All | Expand All

(-)a/src/plugins/acct_gather_filesystem/lustre/acct_gather_filesystem_lustre.c (-30 / +77 lines)
Lines 116-151 static uint64_t debug_flags = 0; Link Here
116
static pthread_mutex_t lustre_lock = PTHREAD_MUTEX_INITIALIZER;
116
static pthread_mutex_t lustre_lock = PTHREAD_MUTEX_INITIALIZER;
117
static int tres_pos = -1;
117
static int tres_pos = -1;
118
118
119
/* Default path to lustre stats */
120
const char proc_base_path[] = "/proc/fs/lustre";
121
119
122
/**
120
/* _llite_path()
123
 *  is lustre fs supported
121
 *
124
 **/
122
 * returns the path to Lustre clients stats (depends on Lustre version)
123
 *
124
 */
125
static char *_llite_path(void)
126
{
127
	static char llite_path[PATH_MAX];
128
	DIR *llite_dir;
129
130
	// test /proc
131
	sprintf(llite_path, "/proc/fs/lustre/llite");
132
	llite_dir = opendir(llite_path);
133
134
	if (!llite_dir) {
135
		debug("%s: unable to open %s %m", __func__, llite_path);
136
137
		// try /sys
138
		sprintf(llite_path, "/sys/kernel/debug/lustre/llite");
139
		llite_dir = opendir(llite_path);
140
		if (!llite_dir) {
141
			debug("%s: unable to open %s %m", __func__, llite_path);
142
143
			// bail
144
			return NULL;
145
		}
146
	}
147
148
	closedir(llite_dir);
149
	return llite_path;
150
}
151
152
153
/*
154
 * _check_lustre_fs()
155
 *
156
 * check if Lustre is supported
157
 *
158
 */
125
static int _check_lustre_fs(void)
159
static int _check_lustre_fs(void)
126
{
160
{
127
	static bool set = false;
161
	static bool set = false;
128
	static int rc = SLURM_SUCCESS;
162
	static int rc = SLURM_SUCCESS;
163
	static char* llite_path;
129
164
130
	if (!set) {
165
	if (!set) {
131
		uint32_t profile = 0;
166
		uint32_t profile = 0;
132
		char lustre_directory[BUFSIZ];
133
		DIR *proc_dir;
134
167
135
		set = true;
168
		set = true;
136
		acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING,
169
		acct_gather_profile_g_get(ACCT_GATHER_PROFILE_RUNNING,
137
					  &profile);
170
					  &profile);
138
		if ((profile & ACCT_GATHER_PROFILE_LUSTRE)) {
171
		if ((profile & ACCT_GATHER_PROFILE_LUSTRE)) {
139
			snprintf(lustre_directory, BUFSIZ,
172
			llite_path = _llite_path();
140
				 "%s/llite", proc_base_path);
173
			if (!llite_path) {
141
			proc_dir = opendir(proc_base_path);
174
				error("%s: can't find Lustre stats", __func__);
142
			if (!proc_dir) {
175
				rc = SLURM_ERROR;
143
				error("%s: not able to read %s %m",
144
				      __func__, lustre_directory);
145
				rc = SLURM_FAILURE;
146
			} else {
176
			} else {
147
				closedir(proc_dir);
177
				debug("%s: using Lustre stats in %s", __func__, llite_path);
178
				rc = SLURM_SUCCESS;
148
			}
179
			}
180
149
		} else
181
		} else
150
			rc = SLURM_ERROR;
182
			rc = SLURM_ERROR;
151
	}
183
	}
Lines 153-163 static int _check_lustre_fs(void) Link Here
153
	return rc;
185
	return rc;
154
}
186
}
155
187
188
156
/* _read_lustre_counters()
189
/* _read_lustre_counters()
190
 *
157
 * Read counters from all mounted lustre fs
191
 * Read counters from all mounted lustre fs
158
 * from the file stats under the directories:
192
 * from the file stats under the directories:
159
 *
193
 *
160
 * /proc/fs/lustre/llite/lustre-xxxx
194
 * /proc/fs/lustre/llite/lustre-xxxx
195
 *  or
196
 * /sys/kernel/debug/lustre/llite/lustre-xxxx
161
 *
197
 *
162
 * From the file stat we use 2 entries:
198
 * From the file stat we use 2 entries:
163
 *
199
 *
Lines 168-188 static int _check_lustre_fs(void) Link Here
168
static int _read_lustre_counters(void)
204
static int _read_lustre_counters(void)
169
{
205
{
170
	char lustre_dir[PATH_MAX];
206
	char lustre_dir[PATH_MAX];
171
	DIR *proc_dir;
207
	DIR *llite_dir;
172
	struct dirent *entry;
208
	struct dirent *entry;
173
	FILE *fff;
209
	FILE *fff;
174
	char buffer[BUFSIZ];
210
	char buffer[BUFSIZ];
211
	static char* llite_path;
175
212
213
	llite_path = _llite_path();
214
	if (!llite_path) {
215
		error("%s: can't find Lustre stats", __func__);
216
		return SLURM_ERROR;
217
	}
218
	debug("%s: using Lustre stats in %s", __func__, llite_path);
176
219
177
	snprintf(lustre_dir, PATH_MAX, "%s/llite", proc_base_path);
220
	snprintf(lustre_dir, PATH_MAX, llite_path);
178
221
179
	proc_dir = opendir(lustre_dir);
222
	llite_dir = opendir(lustre_dir);
180
	if (proc_dir == NULL) {
223
	if (llite_dir == NULL) {
181
		error("%s: Cannot open %s %m", __func__, lustre_dir);
224
		error("%s: Cannot open %s %m", __func__, lustre_dir);
182
		return SLURM_FAILURE;
225
		return SLURM_ERROR;
183
	}
226
	}
184
227
185
	while ((entry = readdir(proc_dir))) {
228
	while ((entry = readdir(llite_dir))) {
186
		char *path_stats = NULL;
229
		char *path_stats = NULL;
187
		bool bread;
230
		bool bread;
188
		bool bwrote;
231
		bool bwrote;
Lines 257-264 static int _read_lustre_counters(void) Link Here
257
		       __func__, lustre_se.all_lustre_nb_writes,
300
		       __func__, lustre_se.all_lustre_nb_writes,
258
		       lustre_se.all_lustre_nb_reads);
301
		       lustre_se.all_lustre_nb_reads);
259
302
260
	} /* while ((entry = readdir(proc_dir)))  */
303
	} /* while ((entry = readdir(llite_dir)))  */
261
	closedir(proc_dir);
304
	closedir(llite_dir);
262
305
263
	lustre_se.last_update_time = lustre_se.update_time;
306
	lustre_se.last_update_time = lustre_se.update_time;
264
	lustre_se.update_time = time(NULL);
307
	lustre_se.update_time = time(NULL);
Lines 269-277 static int _read_lustre_counters(void) Link Here
269
312
270
313
271
314
272
/*
315
/* _update_node_filesystem()
273
 * _thread_update_node_energy calls _read_ipmi_values and updates all values
316
 *
274
 * for node consumption
317
 * acct_gather_filesystem_p_node_update calls _update_node_filesystem and
318
 * updates all values for node Lustre usage
319
 *
275
 */
320
 */
276
static int _update_node_filesystem(void)
321
static int _update_node_filesystem(void)
277
{
322
{
Lines 306-312 static int _update_node_filesystem(void) Link Here
306
	if (_read_lustre_counters() != SLURM_SUCCESS) {
351
	if (_read_lustre_counters() != SLURM_SUCCESS) {
307
		error("%s: Cannot read lustre counters", __func__);
352
		error("%s: Cannot read lustre counters", __func__);
308
		slurm_mutex_unlock(&lustre_lock);
353
		slurm_mutex_unlock(&lustre_lock);
309
		return SLURM_FAILURE;
354
		return SLURM_ERROR;
310
	}
355
	}
311
356
312
	if (first) {
357
	if (first) {
Lines 387-392 extern int init(void) Link Here
387
{
432
{
388
	slurmdb_tres_rec_t tres_rec;
433
	slurmdb_tres_rec_t tres_rec;
389
434
435
	if (debug_flags & DEBUG_FLAG_FILESYSTEM)
436
		info("lustre: loaded");
437
390
	if (!_run_in_daemon())
438
	if (!_run_in_daemon())
391
		return SLURM_SUCCESS;
439
		return SLURM_SUCCESS;
392
440
Lines 406-412 extern int fini(void) Link Here
406
		return SLURM_SUCCESS;
454
		return SLURM_SUCCESS;
407
455
408
	if (debug_flags & DEBUG_FLAG_FILESYSTEM)
456
	if (debug_flags & DEBUG_FLAG_FILESYSTEM)
409
		info("lustre: ended");
457
		info("lustre: unloaded");
410
458
411
	return SLURM_SUCCESS;
459
	return SLURM_SUCCESS;
412
}
460
}
Lines 454-460 extern int acct_gather_filesystem_p_get_data(acct_gather_data_t *data) Link Here
454
	if (_read_lustre_counters() != SLURM_SUCCESS) {
502
	if (_read_lustre_counters() != SLURM_SUCCESS) {
455
		error("%s: Cannot read lustre counters", __func__);
503
		error("%s: Cannot read lustre counters", __func__);
456
		slurm_mutex_unlock(&lustre_lock);
504
		slurm_mutex_unlock(&lustre_lock);
457
		return SLURM_FAILURE;
505
		return SLURM_ERROR;
458
	}
506
	}
459
507
460
	/* Obtain the current values read from all lustre-xxxx directories */
508
	/* Obtain the current values read from all lustre-xxxx directories */
461
- 

Return to ticket 6385