-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_schema.yml
1004 lines (898 loc) · 41.5 KB
/
config_schema.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
# Schema file for DaCe Preferences
# Metadata fields for elements:
# type: any python type (dict, list, int, bool, float, str)
# title: short name to show in GUI
# description: tooltip to show in GUI
# required: required sub-fields (for dict fields)
# default: default value. Can be platform-specific (see below)
# default_<platformname>: default value for platform <platformname> (overrides default)
# template_vars: template variables to include when processing (str fields only)
# Top-level element is a dictionary (record)
type: dict
title: General
description: DaCe Preferences
required:
#############################################
# Categories
optimizer:
type: dict
title: Optimizer
description: Preferences of the SDFG Optimizer
required:
autospecialize:
type: bool
default: false
title: Auto-specialize symbols
description: >
Automatically specialize every SDFG to the symbol values
at call-time. Requires all symbols to be set.
autooptimize:
type: bool
default: false
title: Run auto-optimization heuristics
description: >
Automatically runs the set of optimizing transformation
heuristics on any program called via the Python frontend.
autotile_size:
type: int
default: 128
title: Default tile size in auto-optimization
description: >
Sets the default tile size for the optimization heuristics.
autotile_partial_parallelism:
type: bool
default: true
title: Prefer partial parallelism over write-conflict tiling
description: >
If true, sets the auto-optimizer to prefer extracting map
parallel dimensions over tiling for atomic write-conflict
resolution edges. This may be slower in case of small
parallel dimensions vs. conflicted dimensions. This
preference only applies to symbolic ranges or ranges over
the autotile_size parameter.
transform_on_call:
type: bool
default: false
title: Transform SDFGs on Invocation
description: >
Automatically calls sdfg.optimize() every time a DaCe
program or SDFG is invoked.
interface:
type: str
default: dace.transformation.optimizer.SDFGOptimizer
title: SDFG Optimizer
description: >
SDFG optimization class to import and call on
sdfg.optimize(). Defaults to the transformation CLI,
empty string or an invalid class name skips optimization.
visualize_sdfv:
type: bool
default: false
title: Visualize SDFG
description: Open a SDFV in browser every transformation.
save_intermediate:
type: bool
default: false
title: Save intermediate SDFGs
description: Save SDFG files after every transformation.
automatic_simplification:
type: bool
default: true
title: Automatic SDFG simplification
description: >
Automatically performs SDFG simplification on programs.
detect_control_flow:
type: bool
default: true
title: Detect control flow from state transitions
description: >
Attempts to infer control flow constructs "if",
"for" and "while" from state transitions, allowing
code generators to generate appropriate code.
symbolic_positive:
type: bool
default: true
title: Treat all symbolic expressions as positive
description: >
Every expression in which a symbolic value appears
is treated as strictly positive. This is necessary
for certain Range evaluations using Subgraph Fusion.
match_exception:
type: bool
default: false
title: Treat exceptions in "can_be_applied" as errors
description: >
When an exception is raised in a transformation "can_be_applied"
function, if True the exception is raised further. Otherwise
the exception is printed as a warning.
compiler:
type: dict
title: Compiler
description: Preferences of the compiler
required:
use_cache:
type: bool
default: false
title: Use cache
description: >
If enabled, does not recompile code generated from SDFGs
if shared library (.so/.dll) file is present.
library_prefix:
type: str
default: ""
default_Linux: "lib"
default_Darwin: "lib"
title: Library prefix
description: Filename prefix for shared libraries.
library_extension:
type: str
default: so
default_Linux: so
default_Windows: dll
default_Darwin: dylib
title: Library extension
description: File extension of shared libraries.
indentation_spaces:
type: int
default: 4
title: Indentation width
description: >
Number of spaces used when indenting generated code.
build_type:
type: str
default: RelWithDebInfo
title: Build configuration
description: >
Configuration type for CMake build (can be Debug, Release,
RelWithDebInfo, or MinSizeRel).
allow_shadowing:
type: bool
default: true
title: Allow variable shadowing
description: >
Allowing shadowing of variables in the code (reduces
exceptions to warnings when shadowing is encountered).
codegen_lineinfo:
type: bool
default: false
title: Annotate code generator lines
description: >
Keep a source mapping between generated code and the file/line
of the code generator that generated it. Used for debugging
code generation.
default_data_types:
type : str
default: Python
title : Default data types
description: >
Specify the default data types to use in generating code.
If "Python", Python's semantics will be followed (i.e., `float` and `int`
are represented using 64 bits). If the property is set to "C", C's semantcs will be
used (`float` and `int` are represented using 32bits).
unique_functions:
type: str
default: hash
title: Generate unique functions
description: >
Determine if and how to generate the code for equivalent NestedSDFGs:
"hash": hashing is used to determine if multiple NestedSDFGs with equivalent contents exist.
If this is the case, the code is generated only once.
"unique_name": the unique_name property of SDFG is used to determine if two NestedSDFGs are equal,
generating the code only once. This gives more control to the programmer, that can explicitly
decide what NestedSDFG code can be replicated and what not.
"none": a separate function is code generated for each NestedSDFG
allow_view_arguments:
type: bool
default: false
title: Allow numpy views as arguments
description: >
If true, allows users to call DaCe programs with NumPy views
(for example, "A[:, 1]" or "w.T"). As this can create pointer
aliasing issues with two arrays pointing to the same memory,
or analyzability issue with strides and alignment, this option
is disabled by default.
inline_sdfgs:
type: bool
default: false
title: Inline all nested SDFGs
description: >
If set to true, inlines all nested SDFGs upon code generation by default.
max_stack_array_size:
type: int
default: 65536
title: Max stack-allocated array size (bytes)
description: >
All stack allocated arrays (i.e. StorageType.Register) with
size larger than this will be allocated on the heap.
#############################################
# CPU compiler
cpu:
type: dict
title: CPU
description: CPU compiler preferences
required:
executable:
type: str
default: ''
title: Compiler executable override
description: File path or name of compiler executable
args:
type: str
title: Arguments
description: Compiler argument flags
default: '-std=c++14 -fPIC -Wall -Wextra -O3 -march=native -ffast-math -Wno-unused-parameter -Wno-unused-label'
default_Windows: '/O2 /fp:fast /arch:AVX2 /D_USRDLL /D_WINDLL /D__restrict__=__restrict'
libs:
type: str
title: Additional libraries
description: Additional linked libraries required by target
default: ''
openmp_sections:
type: bool
default: true
title: Use OpenMP sections
description: >
If set to true, multiple connected components will
generate "#pragma omp parallel sections" code around
them.
#############################################
# GPU (CUDA/HIP) compiler
cuda:
type: dict
title: GPU
description: GPU (CUDA/HIP) compiler preferences
required:
backend:
type: str
default: 'cuda'
title: Compilation backend
description: >
Backend to compile for ('cuda' for NVIDIA or 'hip' for AMD).
path:
type: str
default: ''
title: CUDA/HIP path override
description: Path to CUDA toolkit or ROCm/HIP root directory
args:
type: str
title: nvcc Arguments
description: Compiler argument flags for CUDA
default: '-std=c++14 -Xcompiler -fPIC -O3 -Xcompiler -march=native --use_fast_math -Xcompiler -Wno-unused-parameter'
default_Windows: '-std=c++14 -O3 --use_fast_math'
hip_args:
type: str
title: hipcc Arguments
description: Compiler argument flags for HIP
default: '-std=c++17 -fPIC -O3 -ffast-math -Wno-unused-parameter'
cuda_arch:
type: str
title: Additional CUDA architectures
description: >
Additional CUDA architectures (separated by commas)
to compile GPU code for, excluding the current
architecture on the compiling machine.
default: '35'
hip_arch:
type: str
title: Additional HIP architectures
description: >
Additional HIP architectures (separated by commas)
to compile GPU code for, excluding the current
architecture on the compiling machine.
default: '906'
default_block_size:
type: str
title: Default thread-block size
description: >
Default thread-block size for GPU kernels when
explicit GPU block maps are not defined.
Can be set to 'max' to maximize occupancy.
default: '32,1,1'
dynamic_map_block_size:
type: str
title: Thread-Block size for GPU_ThreadBlock_Dynamic
description: >
Thread-Block size for maps using GPU_ThreadBlock_Dynamic
scheduler. Can be set to 'max' to maximize occupancy.
default: '128,1,1'
dynamic_map_fine_grained:
type: bool
title: Enable fine grained load balancing for GPU_ThreadBlock_Dynamic
description: >
If true the scheduler will dynamically redistribute the
combined work of all threads in the warp equally across the
warp (fine grained). Otherwise, each warp works sequentially
only on its tasks (potential load imbalance).
default: true
persistent_map_SM_fraction:
type: float
title: Fraction of SMs to use for persistent GPU map
description: >
Sets the fraction of the number of SMs of the Device
that the GPU_Persistent map can use. Together with
persistent_map_occupancy this specifies the grid
size of the kernel being launched.
0.0 < persistent_map_SM_fraction <= 1.0
The fraction will be rounded up to the next integer
number of SMs. The max value of SMs that can/will
be used is equal to cudaDevAttrMultiProcessorCount.
default: 1.0
persistent_map_occupancy:
type: int
title: Number of blocks to launch per SM used
description: >
Sets the number of thread block to be launched per
SM being used. Essentially this is a simple
multiplier to persistent_map_SM_fraction.
It is up to the user to check if the resulting
number of thread blocks can run efficiently on the
GPU.
default: 2
max_concurrent_streams:
type: int
title: Concurrent execution streams
description: >
Maximum number of concurrent CUDA/HIP streams to
generate. Special values: -1 only uses the
default stream, 0 uses infinite concurrent streams.
default: 0
syncdebug:
type: bool
title: Synchronous Debugging
description: >
Enables Synchronous Debugging mode, where each library call
is followed by full-device synchronization and error checking.
default: false
libs:
type: str
title: Additional libraries
description: Additional linked libraries required by target
default: ''
#############################################
# General FPGA flags
fpga:
type: dict
title: FPGA
description: "Common preferences for FPGA compilation."
required:
autobuild_bitstreams:
type: bool
default: true
title: Automatically build bitstreams
description: >
If set to true, CMake will automatically build missing
bitstreams when running an FPGA program. This can take a
very long time, and users might want to do this manually.
If set to false, the program will optimistically assume
that the bitstream is present in the build directory, and
will crash if this is not the case.
minimum_fifo_depth:
type: int
default: ''
title: Minimum depth of FIFOs
description: Sets the minimum depth of any generated FIFO.
vendor:
type: str
default: xilinx
title: FPGA vendor
description: >
Target Xilinx ("xilinx") or Intel ("intel_fpga") FPGAs when
generating code.
#############################################
# FPGA (Xilinx) compiler flags
xilinx:
type: dict
title: Xilinx
description: FPGA (Xilinx) compiler preferences
required:
mode:
type: str
default: simulation
title: Compilation mode
description: Target of FPGA kernel build (simulation/software_emulation/hardware_emulation/hardware)
path:
type: str
default: ''
title: Vitis installation override
description: >
Path to specific Vitis/SDx/SDAccel installation to
use instead of just searching PATH and environment
variables.
platform:
type: str
default: xilinx_u250_xdma_201830_2
title: Target platform for Xilinx
description: Platform name of Vitis/SDx/SDAccel target.
frequency:
type: str
default: ''
title: Target frequency for Xilinx kernels
description: >
Target frequency, in MHz, when compiling kernels
for Xilinx. Will not necessarily be achieved in
practice. To enable multiple clocks, enter values
in the format "clock_id:frequency", with frequency
being specified in MHz separated by an escaped bar,
all enclosed in quotes. E.g. "0:250\|1:500".
enable_debugging:
type: bool
default: false
title: Enable debugging for hardware kernels
description: >
Injects debugging cores on the interfaces of the
kernel, allowing fine-grained debugging of hardware
runs at the cost of additional resources. This is
always enabled for emulation runs.
host_flags:
type: str
title: Host arguments
description: Extra host compiler argument flags
default: "-Wno-unknown-pragmas -Wno-unused-label"
synthesis_flags:
type: str
title: Synthesis arguments
description: High-level synthesis C++ flags
default: "-std=c++11"
build_flags:
type: str
title: Arguments
description: Kernel build C++ flags
default: ""
decouple_array_interfaces:
type: bool
default: false
title: Decouple array memory interfaces
description: >
If an array is both read and written, this option decouples
its accesses, by creatin a memory interface for reading and one
for writing.
Note that this may hide potential Read-After-Write or
Write-After-Read dependencies.
#############################################
# Intel FPGA compiler flags
intel_fpga:
type: dict
title: Intel FPGA
description: Intel FPGA compiler preferences.
required:
mode:
type: str
default: emulator
title: Compilation mode
description: >
Target of FPGA kernel build
(emulator/simulator/hardware).
path:
type: str
default: ''
title: Intel FPGA OpenCL SDK installation override
description: >
Path to specific Intel FPGA OpenCL SDK installation
to use instead of just searching PATH and
environment variables.
board:
type: str
default: a10gx
title: Target FPGA board
enable_debugging:
type: bool
default: false
title: Enable debugging for hardware kernels
description: Injects debugging cores where available.
host_flags:
type: str
title: Host arguments
description: Extra host compiler argument flags
default: "-Wno-unknown-pragmas"
kernel_flags:
type: str
title: Kernel flags
description: High-level synthesis C++ flags
default: "-fp-relaxed -cl-no-signed-zeros -cl-fast-relaxed-math -cl-single-precision-constant -no-interleaving=default"
#############################################
# RTL (SystemVerilog) compiler
rtl:
type: dict
title: RTL
description: RTL (SystemVerilog) compiler preferences
required:
verbose:
type: bool
default: false
title: Verbose Build & Execution Output
description: Output full build and execution (incl internal state) log.
verilator_flags:
type: str
default: ''
title: Additional Verilator Arguments
description: Additional arguments feed to verilator.
verilator_lint_warnings:
type: bool
default: true
title: Verilator Lint Warnings
description: Enable/Disable detailed SV lint checker output.
verilator_enable_debug:
type: bool
default: false
title: Verilator Enable Debug
description: Enable/disable verbose internal state debug output.
#############################################
# MPI compiler
mpi:
type: dict
title: MPI
description: MPI compiler preferences
required:
executable:
type: str
default: ''
title: Compiler executable override
description: File path or name of compiler executable
#############################################
# Linker
linker:
type: dict
title: Linker
description: Linker preferences
required:
executable:
type: str
default: ''
title: Linker executable override
description: File path or name of linker executable
args:
type: str
title: Arguments
description: Linker argument flags
# Tell linker to use rpath instead of runpath. Intel
# FPGA programs fail to find certain libraries at
# runtime with runpath.
default: '-Wl,--disable-new-dtags'
default_Darwin: ''
default_Windows: ''
execution:
type: dict
title: Execution
description: Binary execution preferences
required:
general:
type: dict
title: General
description: General execution preferences
required:
host:
type: str
default: localhost
title: Host
description: Hostname to use for execution
workdir:
type: str
default: '/tmp/'
title: Working directory
description: Working directory on the remote host
check_args:
type: bool
default: true
title: Check arguments
description: >
Do strict verification that arguments passed when
calling a DaCe program match the expected dtypes.
execcmd:
type: str
title: Command
description: >
Command to use to execute ${command} on ${host}
default: 'ssh ${host} ${command}'
template_vars:
- host
- command
copycmd_r2l:
type: str
default: 'scp ${host}:${srcfile} ${dstfile}'
title: "Remote->Local copy command"
description: >
Command to use to copy ${srcfile} on ${host} to
the local ${dstfile}.
template_vars:
- host
- srcfile
- dstfile
copycmd_l2r:
type: str
default: "scp ${srcfile} ${host}:${dstfile}"
title: "Local->Remote copy command"
description: >
Command to use to copy the local ${srcfile} to the
remote ${dstfile}.
template_vars:
- host
- srcfile
- dstfile
repetitions:
type: int
default: 5
title: "Repetitions per Run"
description: >
Number of repetitions to run for each click of the
Run button (median value will be reported in the
performance chart).
mpi:
type: dict
title: MPI
description: MPI execution preferences
required:
mpiexec:
type: str
default: 'mpirun -n ${num_procs} ${command}'
title: mpirun command
description: >
Command to use to execute MPI job ${command} with
${num_procs} processes.
template_vars:
- num_procs
- command
num_procs:
type: int
default: 4
title: Number of processes
description: Number of MPI processes to use
instrumentation:
type: dict
title: Instrumentation
description: Instrumentation preferences
required:
report_each_invocation:
type: bool
title: Save report for each invocation
default: true
description: >
Save an instrumentation report file for each invocation of
the SDFG, rather than one report that spans from SDFG
initialization to finalization.
papi:
type: dict
title: PAPI
description: PAPI configuration
required:
default_counters:
type: str
title: Default PAPI counters
default: "['PAPI_TOT_INS', 'PAPI_TOT_CYC', 'PAPI_L2_TCM', 'PAPI_L3_TCM']"
description: >
Sets the default PAPI counter list, formatted as
a Python list of strings.
overhead_compensation:
type: bool
title: Compensate Overhead
default: true
description: >
Subtracts the minimum measured overhead from every measurement.
vectorization_analysis:
type: bool
title: Enable vectorization check
default: false
description: >
Enables analysis of gcc vectorization information. Only gcc/g++ is supported.
print_fpga_runtime:
type: bool
default: false
title: Print FPGA runtime
description: Prints the runtime of instrumented FPGA kernel states to standard output.
#############################################
# Python frontend settings
frontend:
type: dict
title: Frontend
description: Python frontend preferences
required:
cache_size:
type: int
title: Program cache size
default: 32
description: >
The number of compiled programs to cache (based on argument
types, closure constants, and closure array types) to avoid
reparsing/compiling when calling a @dace.program or method.
implicit_recursion_depth:
type: int
title: Auto-parsing recursion depth
default: 64
description: >
The maximum call-stack depth allowed when automatically
parsing called dace functions or methods.
raise_nested_parsing_errors:
type: bool
title: Raise nested parsing errors
default: false
description: >
Raise all errors out of nested function parsing contexts
instead of trying to create a callback implicitly.
verbose_errors:
type: bool
title: Show preprocessed AST on parsing errors
default: false
description: >
Prints out the preprocessed unparsed AST in case of a parsing
error.
preprocessing_passes:
type: int
title: Number of preprocessing passes on Python code
default: 5
description: >
Number of times to run the Python preprocessing passes (e.g., constant
folding) on the input code. Set to zero to disable preprocessing
optimizations, set to -1 to run until the code has not changed.
dont_fuse_callbacks:
type: bool
title: Do not fuse callbacks
default: false
description: >
Stricter mode of operation where callbacks into Python don't participate
in state fusion transformations.
unroll_threshold:
type: int
title: Automatic unroll loop size threshold
default: -1
description: >
Threshold for automatic loop unrolling of any generator (e.g., including ``range``) with a
compile-time size. A value of -1 (default) means not to unroll any loop automatically, a value of
0 means unrolling every loop, and a value above zero sets a size threshold beyond which a
constant-sized loop will not be automatically unrolled.
#############################################
# General settings
debugprint:
type: bool
default: false
title: Debug printing
description: Enable verbose printouts.
progress:
type: bool
default: true
title: Progress reports
description: Enable progress report printouts.
cache:
type: str
default: name
title: Naming of cache entry
description: >
Determine the name of the generated dacecache folder.
"name" uses the name of the SDFG directly, causing it to be
overridden by other programs using the same SDFG name.
"hash" uses a mangled name based on the hash of the SDFG, such that
any change to the SDFG will generate a different cache folder.
"unique" uses a name based on the currently running Python process
at code generation time, such that no caching or clashes can happen
between different processes or subsequent invocations of Python.
"single" uses a single cache folder for all SDFGs, saving space and
potentially build time, but disallows executing SDFGs in parallel
and caching of more than one simultaneous SDFG.
store_history:
type: bool
default: true
title: Store SDFG transformation history
description: Store the history of transformations on the SDFG file.
default_build_folder:
type: str
default: .dacecache
title: Default SDFG build folder
description: >
Default folder in which compiled DaCe programs and SDFGs are stored.
Can either be a relative path (by default) or absolute.
profiling:
type: bool
default: false
title: Profiling
description: Enable profiling support.
profiling_status:
type: bool
default: true
title: Status bar for profiling
description: >
Enable tqdm status bar while profiling. If tqdm is not installed
a warning will appear. To disable this feature (and the warning) set
this option to false.
treps:
type: int
default: 100
title: Profiling Repetitions
description: Number of times to run program for profiling.
#############################################
# Experimental features
experimental:
type: dict
title: Experimental
description: Experimental features
required:
validate_undefs:
type: bool
default: false
title: Undefined Symbol Check
description: >
Check for undefined symbols in memlets during SDFG validation.
#############################################
# Features for unit testing
testing:
type: dict
title: Testing
description: Unit testing settings
required:
serialization:
type: bool
default: false
title: Test Serialization on validation
description: >
Before generating code, verify that a serialization/deserialization loop
generates the same SDFG.
deserialize_exception:
type: bool
default: false
title: Treat exceptions in deserialization as errors
description: >
When an exception is raised in a deserialization process (e.g., due to missing library node),
by default a warning is issued. If this setting is True, the exception will be raised as-is.
#############################################
# DaCe library settings
library:
type: dict
title: Library
description: Settings for handling the use of DaCe libraries.
required:
blas:
type: dict
title: BLAS
description: Built-in BLAS DaCe library.
required:
default_implementation:
type: str
default: pure
description: Default implementation BLAS library nodes.
override:
type: bool
default: false
description: >
Force the default implementation, even if an
implementation has been explicitly set on a node.
fpga:
type: dict
title: FPGA
description: FPGA-specific BLAS options.
required:
default_stream_depth:
type: int
default: 32
title: Default FPGA stream depth
description: >
Default FPGA stream depth used in the BLAS
library nodes and the corresponding
streaming transformations
lapack:
type: dict
title: LAPACK
description: Built-in LAPACK DaCe library.
required:
default_implementation:
type: str
default: OpenBLAS
description: Default implementation of LAPACK library nodes.
override:
type: bool
default: false
description: >
Force the default implementation, even if an
implementation has been explicitly set on a node.
linalg:
type: dict
title: linalg
description: Built-in NumPy linalg DaCe library.
required:
default_implementation:
type: str
default: OpenBLAS
description: Default implementation of linalg library nodes.
override:
type: bool