-
Notifications
You must be signed in to change notification settings - Fork 9
/
quo.h
626 lines (586 loc) · 18.1 KB
/
quo.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
/*
* Copyright (c) 2013-2024 Triad National Security, LLC
* All rights reserved.
*
* This file is part of the libquo project. See the LICENSE file at the
* top-level directory of this distribution.
*/
/**
* @file quo.h
*/
/* I do a pretty terrible job explaining the interface. Play around with the
* demo codes, as they are simple and pretty clearly illustrate how to use QUO.
*/
#ifndef QUO_H_INCLUDED
#define QUO_H_INCLUDED
/* For MPI_Comm type */
#include "mpi.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Convenience definition (in case you need this). */
#define LIBQUO 1
/**
* New in v1.4.0. Prior versions didn't have this defined, since we neglected to
* version our API in this way.
*/
#define QUO_HAS_API_VERSIONING 1
/**
* This number is updated to (X<<16)+(Y<<8)+Z when a release X.Y.Z modifies the
* API. This is new in v1.4.0, since we neglected to version our API in this
* way.
*/
#define QUO_API_VERSION 0x00010400
/** Opaque QUO context. */
struct QUO_t;
/** Convenience typedef. */
typedef struct QUO_t QUO_t;
/** External QUO context type. */
typedef QUO_t * QUO_context;
/**
* QUO return codes:
* - fatal = libquo can no longer function.
* - not fatal = libquo can continue functioning, but an error occurred.
*/
enum {
/** Success. */
QUO_SUCCESS = 0,
/** Success, but already done. */
QUO_SUCCESS_ALREADY_DONE,
/** General error -- fatal. */
QUO_ERR,
/** System error -- fatal. */
QUO_ERR_SYS,
/** Out of resources error -- fatal. */
QUO_ERR_OOR,
/** Invalid argument provided to library -- usually fatal. */
QUO_ERR_INVLD_ARG,
/** Library call before QUO_init was called -- improper use of library. */
QUO_ERR_CALL_BEFORE_INIT,
/** Topology error -- fatal. */
QUO_ERR_TOPO,
/** MPI error -- fatal. */
QUO_ERR_MPI,
/** Action not supported -- usually not fatal. */
QUO_ERR_NOT_SUPPORTED,
/** Pop error -- not fatal, but usually indicates improper use. */
QUO_ERR_POP,
/** The thing that you were looking for wasn't found -- not fatal. */
QUO_ERR_NOT_FOUND
};
/** Hardware resource types. */
typedef enum {
/** The machine. */
QUO_OBJ_MACHINE = 0,
/** NUMA node. */
QUO_OBJ_NUMANODE,
/** Package. */
QUO_OBJ_PACKAGE,
/** Socket (synonym for QUO_OBJ_PACKAGE). */
QUO_OBJ_SOCKET,
/** Core. */
QUO_OBJ_CORE,
/** Processing unit (e.g. hardware thread). */
QUO_OBJ_PU
} QUO_obj_type_t;
/** Push policies that influence QUO_bind_push behavior. */
typedef enum {
/** Push the exact binding policy that was provided. */
QUO_BIND_PUSH_PROVIDED = 0,
/** Push to the enclosing QUO_obj_type_t provided. */
QUO_BIND_PUSH_OBJ
} QUO_bind_push_policy_t;
/** Context-specific flags that influence how QUO behaves. */
typedef enum {
/** Software disable multi-threading (hyper-threading). */
QUO_CREATE_NO_MT = 1
} QUO_create_flags_t;
/* ////////////////////////////////////////////////////////////////////////// */
/* ////////////////////////////////////////////////////////////////////////// */
/* QUO API */
/* ////////////////////////////////////////////////////////////////////////// */
/* ////////////////////////////////////////////////////////////////////////// */
/**
* Version query routine.
*
* @param[out] version Major version.
*
* @param[out] subversion Subversion.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \note
* This routine can be called before QUO_init.
*/
int
QUO_version(int *version,
int *subversion);
/**
* Context handle construction and initialization routine.
*
* @param[in] comm Initializing MPI communicator.
* @param[out] q Reference to a new QUO_context.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \note
* This is typically the first "real" call into the library. A relatively
* expensive routine that must be called AFTER MPI_Init. Call QUO_free to free
* returned resources.
*
* \code{.c}
* QUO_context quo = NULL;
* if (QUO_SUCCESS != QUO_create(&quo, MPI_COMM_WORLD)) {
* // error handling //
* }
* \endcode
*/
int
QUO_create(QUO_context *q,
MPI_Comm comm);
/**
* Same as QUO_create(), but allows users to add flags that influence how QUO
* behaves.
*/
int
QUO_create_with_flags(QUO_context *q,
MPI_Comm comm,
QUO_create_flags_t flags);
/**
* Context handle destruction routine.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \note
* This is typically the last "real" call into the library. A relatively
* inexpensive routine that must be called BEFORE MPI_Finalize. Once a call to
* this routine is made, it is an error to use any libquo services associated
* with the freed libquo context from any other participating process.
*
* \code{.c}
* if (QUO_SUCCESS != QUO_free(quo)) {
* // error handling //
* }
* \endcode
*/
int
QUO_free(QUO_context q);
/**
* Context query routine that returns the total number of hardware
* resource objects that are on the caller's system.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] target_type Hardware object type that is being queried.
*
* @param[out] out_nobjs Total number of hardware object types found on the
* system.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* int nsockets = 0;
* if (QUO_SUCCESS != QUO_nobjs_by_type(q, QUO_OBJ_SOCKET, &nsockets)) {
* // error handling //
* }
* \endcode
*/
int
QUO_nobjs_by_type(QUO_context q,
QUO_obj_type_t target_type,
int *out_nobjs);
/**
* Context query routine that returns the total number of hardware
* resource objects that are in another hardware resource (e.g. cores in a
* socket).
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] in_type Container hardware object type.
*
* @param[in] in_type_index in_type's ID (base 0).
*
* @param[in] type Target hardware object found in in_type[in_type_index].
*
* @param[out] out_result Total number of hardware object types found by the query.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* int ncores_in_first_socket = 0;
* if (QUO_SUCCESS != QUO_nobjs_in_type_by_type(q, QUO_OBJ_SOCKET, 0
* QUO_OBJ_CORE,
* &ncores_in_first_socket)) {
* // error handling //
* }
* \endcode
*/
int
QUO_nobjs_in_type_by_type(QUO_context q,
QUO_obj_type_t in_type,
int in_type_index,
QUO_obj_type_t type,
int *out_result);
/**
* Context handle query routine that returns whether or not my current
* binding policy falls within a particular system hardware resource (is
* enclosed).
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] type Hardware object type.
*
* @param[in] in_type_index type's ID (base 0).
*
* @param[out] out_result Flag indicating whether or not my current binding policy
* falls within type[in_type_index].
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* int cur_bind_covers_sock3 = 0;
* if (QUO_SUCCESS != QUO_cpuset_in_type(q, QUO_OBJ_SOCKET, 2
* &cur_bind_enclosed_in_sock3)) {
* // error handling //
* }
* if (cur_bind_enclosed_in_sock3) {
* // do stuff //
* }
* \endcode
*/
int
QUO_cpuset_in_type(QUO_context q,
QUO_obj_type_t type,
int in_type_index,
int *out_result);
/**
* Similar to QUO_cpuset_in_type, but returns the "SMP_COMM_WORLD" QUO IDs that
* met the query criteria.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] type Hardware object type.
*
* @param[in] in_type_index type's ID (base 0).
*
* @param[out] out_nqids Total number of node (job) processes that satisfy the
* query criteria.
*
* @param[out] out_qids An array of "SMP_COMM_WORLD ranks" that met the query
* criteria. *out_qids must be freed by a call to free(3).
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* int nqids_enclosed_in_socket0 = 0;
* int *qids_enclosed_in_socket0 = NULL;
* if (QUO_SUCCESS != QUO_qids_in_type(q, QUO_OBJ_SOCKET, 0
* &nqids_enclosed_in_socket0,
* &qids_enclosed_in_socket0)) {
* // error handling //
* }
* free(qids_enclosed_in_socket0);
* \endcode
*/
int
QUO_qids_in_type(QUO_context q,
QUO_obj_type_t type,
int in_type_index,
int *out_nqids,
int **out_qids);
/**
* Query routine that returns the total number of NUMA nodes that are
* present on the caller's system.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[out] out_nnumanodes Total number of NUMA nodes on the system.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* int nnumanodes = 0;
* if (QUO_SUCCESS != QUO_nnumanodes(q, &nnumanodes)) {_
* // error handling //
* }
* \endcode
*/
int
QUO_nnumanodes(QUO_context q,
int *out_nnumanodes);
/**
* Similar to QUO_nnumanodes, but returns the total number of sockets present on
* the caller's system.
*/
int
QUO_nsockets(QUO_context q,
int *out_nsockets);
/**
* Similar to QUO_nnumanodes, but returns the total number of cores present on
* the caller's system.
*/
int
QUO_ncores(QUO_context q,
int *out_ncores);
/**
* Similar to QUO_nnumanodes, but returns the total number of processing units
* (PUs) (e.g., hardware threads) present on the caller's system.
*/
int
QUO_npus(QUO_context q,
int *out_npus);
/**
* Similar to QUO_nnumanodes, but returns the total number of compute nodes
* (i.e., servers) in the current job.
*/
int
QUO_nnodes(QUO_context q,
int *out_nodes);
/**
* Similar to QUO_nnumanodes, but returns the total number of job processes that
* are on the caller's compute node.
*
* \note
* *out_nqids includes the caller. For example, if there are 3 MPI processes on
* rank 0's (MPI_COMM_WORLD) node, then rank 0's call to this routine will
* result in *out_nqids being set to 3.
*/
int
QUO_nqids(QUO_context q,
int *out_nqids);
/**
* Query routine that returns the caller's compute node QUO node ID.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[out] out_qid The caller's node ID, as assigned by libquo.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* NOTES: QIDs start at 0 and go to NNODERANKS - 1.
*
* \code{.c}
* int mynodeqid = 0;
* if (QUO_SUCCESS != QUO_id(q, &mynodeqid)) {_
* // error handling //
* }
* if (0 == mynodeqid) {
* // node id 0 do stuff //
* }
* \endcode
*/
int
QUO_id(QUO_context q,
int *out_qid);
/**
* Query routine that returns whether or not the caller is currently
* "bound" to a CPU resource.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[out] bound Flag indicating whether or not the caller is currently bound.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \note
* If the caller's current cpuset is equal to the widest available
* cpuset, then the caller is not bound as far as libquo is concerned. For
* example, if your system has only one core and the calling process is "bound"
* to that one core, then as far as we are concerned, the caller is not bound.
*
* \code{.c}
* int bound = 0;
* if (QUO_SUCCESS != QUO_bound(q, &bound)) {_
* // error handling //
* }
* if (!bound) {
* // take action //
* }
* \endcode
*/
int
QUO_bound(QUO_context q,
int *bound);
/**
* Query routine that returns a string representation of the caller's
* current binding policy (cpuset) in a hexadecimal format. @see CPUSET(7).
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[out] cbind_str The caller's current CPU binding policy in string form.
* *cbind_str must be freed by call to free(3). (OUT)
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* char *cbindstr = NULL;
* if (QUO_SUCCESS != QUO_stringify_cbind(q, &cbindstr)) {
* // error handling //
* }
* printf("%s\n", cbindstr);
* free(cbindstr);
* \endcode
*/
int
QUO_stringify_cbind(QUO_context q,
char **cbind_str);
/**
* Routine that changes the caller's process binding policy. The policy
* is maintained in the current context's stack.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] policy Policy that influence the behavior of this routine. If
* QUO_BIND_PUSH_PROVIDED is provided, then the type and
* obj_index are used as the new policy. If QUO_BIND_PUSH_OBJ
* is provided, then obj_index is ignored and the "closest"
* type is used.
*
* @param[in] type The hardware resource to bind to.
*
* @param[in] obj_index When not ignored, type's index (base 0).
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \note
* To revert to the previous binding policy call QUO_bind_pop.
*
* \code{.c}
* // in this example we will bind to socket 0 //
* if (QUO_SUCCESS != QUO_bind_push(q, QUO_BIND_PUSH_PROVIDED,
* QUO_OBJ_SOCKET, 0)) {
* // error handling //
* }
* // revert to previous process binding policy //
* if (QUO_SUCCESS != QUO_bind_pop(q)) {
* // error handling //
* }
* // EXAMPLE 2
* // in this example we will bind to the "closest" socket //
* if (QUO_SUCCESS != QUO_bind_push(q, QUO_BIND_PUSH_OBJ,
* QUO_OBJ_SOCKET, -1)) {
* // error handling //
* }
* // revert to previous process binding policy //
* if (QUO_SUCCESS != QUO_bind_pop(q)) {
* // error handling //
* }
* \endcode
*/
int
QUO_bind_push(QUO_context q,
QUO_bind_push_policy_t policy,
QUO_obj_type_t type,
int obj_index);
/**
* Routine that changes the caller's process binding policy by replacing
* it with the policy at the top of the provided context's process bind stack.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* // in this example we will bind to socket 0 //
* if (QUO_SUCCESS != QUO_bind_push(q, QUO_BIND_PUSH_PROVIDED,
* QUO_OBJ_SOCKET, 0)) {
* // error handling //
* }
* // revert to previous process binding policy //
* if (QUO_SUCCESS != QUO_bind_pop(q)) {
* // error handling //
* }
* \endcode
*/
int
QUO_bind_pop(QUO_context q);
/**
* Routine that acts as a compute node barrier. All context-initializing
* processes on a node MUST call this in order for everyone to proceed past the
* barrier. See demos for examples.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* // time for p1 to do some work with some of the ranks //
* if (working) {
* // *** do work *** //
* // signals completion //
* if (QUO_SUCCESS != QUO_barrier(q)) {
* // error handling //
* }
* } else {
* // non workers wait in a barrier //
* if (QUO_SUCCESS != QUO_barrier(q)) {
* // error handling //
* }
* }
* \endcode
*/
int
QUO_barrier(QUO_context q);
/**
* Routine that helps evenly distribute processes across hardware
* resources. The total number of processes assigned to a particular resource
* will not exceed max_qids_per_res_type.
*
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] distrib_over_this The target hardware resource on which processes
* will be evenly distributed.
*
* @param[in] max_qids_per_res_type The maximum number of processes that will be
* assigned to the provided resources. For
* example, if your system has two sockets and
* max_qids_per_res_type is 2, then a max of 4
* processes will be chosen (max 2 per socket).
* This routine doesn't modify the calling
* processes' affinities, but is used as a
* helper for distributing processes over
* hardware resources given a global view of all
* the affinities within a job. I'm doing a
* terrible job explaining this, so look at the
* demos. Believe me, this routine is useful...
*
* @param[out] out_selected Flag indicating whether or not i was chosen in the
* work distribution. 1 means I was chosen, 0
* otherwise.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*
* \code{.c}
* int res_assigned = 0;
* if (QUO_SUCCESS != QUO_auto_distrib(q, QUO_OBJ_SOCKET,
* 2, &res_assigned)) {
* // error handling //
* }
* \endcode
*/
int
QUO_auto_distrib(QUO_context q,
QUO_obj_type_t distrib_over_this,
int max_qids_per_res_type,
int *out_selected);
/**
* @param[in] q Constructed and initialized QUO_context.
*
* @param[in] target_type Target hardware object type.
*
* @param[out] out_comm MPI_Comm_dup'd communicator containing processes that
* match the target request. Returned resources must be
* freed with a call to MPI_Comm_free.
*
* @retval QUO_SUCCESS if the operation completed successfully.
*/
int
QUO_get_mpi_comm_by_type(QUO_context q,
QUO_obj_type_t target_type,
MPI_Comm *out_comm);
#ifdef __cplusplus
}
#endif
#endif